#!/bin/sh # Copyright 2007, 2008, 2009, 2010, 2011 Michael Creel # Copyright 2010 Robert G. Petry # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # set this to the network you'd like to use for the cluster # make_pelican: this script allows you to make a custom version # of the PelicanHPC live CD/USB image for creation of high performance # computing clusters. You need to install live-build, debootstrap and rsync to use # it. These packages are installed on PelicanHPC released images, so you can use # this script on PelicanHPC. It can be used on any other GNU/Linux distro as long # as these packages are installed. # See http://pareto.uab.es/mcreel/PelicanHPC for more information. # Version 2.7 # * octave 3.6.0 from source # * dynare 4.2.4 # * add pelicanhpc.pdf to desktop #### packages to include on image - place names of packages you want here #### cat < pelicanhpc.list # basic stuff needed for cluster setup dnsmasq syslinux nfs-kernel-server nfs-common tftpd-hpa xinetd ssh # configuration and tools wget bzip2 dialog less net-tools rsync fping screen make htop fail2ban locales console-common byobu gqview vim subversion debootstrap pssh lm-sensors joe nano firmware-linux live-build dsh build-essential # X stuff xorg xfce4 konsole konqueror ksysguard ksysguardd okular kate gnuplot iceweasel # libraries for building source code libx11-dev build-essential gfortran texinfo libatlas-base-dev libarpack2-dev libglpk-dev libfftw3-dev libsuitesparse-dev libreadline-dev libpcre3-dev libgl1-mesa-dev libfreetype6-dev libglu1-mesa-dev libfltk1.1-dev libftgl-dev libgraphicsmagick1-dev libgraphicsmagick++1-dev libboost-dev python-dev # octave # octave3.2-headers octave3.2 octave3.2-info octave3.2-doc # openmpi libopenmpi-dev openmpi-bin openmpi-doc # dynare # dynare dynare-common dynare-doc # gretl gretl gretl-data gretl-doc gretl-common # ganglia, firewall #ganglia-monitor gmetad libganglia1 ganglia-webfrontend #librrds-perl librrd2-dev #apache2 php5 rrdtool php5-gd # WOL etherwake beep debconf-utils arno-iptables-firewall # Python python-scipy python-matplotlib python-numpy ipython python-mpmath python-gmpy PACKAGELIST ################## OPTIONS FOR IMAGE ################ ARCHITECTURE="amd64" KERNEL="amd64" #ARCHITECTURE="i386" #KERNEL="686" #IMAGETYPE="iso" IMAGETYPE="iso-hybrid" DISTRIBUTION="squeeze" MIRROR="de" IMAGENAME="pelicanhpc-custom.iso" ################## pelicanhome ########################### # uncomment the following to get the files on the official releases # or replace the URL or the contents of ./pelicanhome with your own # files to add custimized content wget http://idea.uab.es/mcreel/PelicanHPC/download/pelicanhome.tar.bz2 tar xfj pelicanhome.tar.bz2 sync ########################################################## ###################################################################################### ## !!!!! END OF NORMAL CONFIGURATION OPTIONS !!!!!!!!! ## ## leave the rest of this alone unless you really know what you're up to ## ###################################################################################### date THISDIR="`pwd`" # get default splash screen if none is present if [ ! -e "$THISDIR/splash.rle" ]; then wget http://pareto.uab.es/mcreel/PelicanHPC/download/splash.rle fi install -d "$ARCHITECTURE" install -d pelicanhome install -d packages cp make_pelican-* pelicanhome cd "$THISDIR/$ARCHITECTURE" # frontend configuration lb config \ -p "pelicanhpc.list" \ --apt apt --apt-recommends disabled \ --binary-indices disabled \ -a "$ARCHITECTURE" \ -b "$IMAGETYPE" \ -d "$DISTRIBUTION" \ -k "$KERNEL" \ --mirror-binary http://ftp."$MIRROR".debian.org/debian/ \ --mirror-chroot http://ftp."$MIRROR".debian.org/debian/ \ --mirror-bootstrap http://ftp."$MIRROR".debian.org/debian/ \ --mirror-binary-security http://security.debian.org/ \ --mirror-chroot-security http://security.debian.org \ --hostname pelican \ --iso-volume PelicanHPC \ --syslinux-timeout 20 \ --syslinux-menu enabled \ --bootappend-live "noautologin noxautologin quickreboot" \ --archive-areas "main contrib non-free" lb clean # SYSLINUX SPLASH SCREEN install -d config/binary_syslinux/ cp "$THISDIR"/splash.rle config/binary_syslinux/splash.rle lb config --syslinux-splash "config/binary_syslinux/splash.rle" ########## make directories on chroot ######### install -d config/chroot_local-includes/usr/bin install -d config/chroot_local-packagelists install -d config/chroot_local-packages install -d config/chroot_local-includes/etc/skel ########## copy stuff to be added to chroot ###### mv "$THISDIR"/pelicanhpc.list config/chroot_local-packageslists/pelicanhpc.list rsync -az "$THISDIR"/pelicanhome/ config/chroot_local-includes/etc/skel rsync -az "$THISDIR"/packages/ config/chroot_local-packages # chroot hook: configure initrd for netboot of compute nodes, and install some software from source cat < config/chroot_local-hooks/script.sh #!/bin/bash # note: can't actually build the initrd here, as it will be missing modules # needed when running on a different system update-initramfs -u -k all install -d /var/lib/tftpboot cp -a /etc/initramfs-tools/ /etc/initramfs-pxe sed -i 's/BOOT=local/BOOT=nfs/g' /etc/initramfs-pxe/initramfs.conf # get, build, and install Open MPI #wget http://www.open-mpi.org/software/ompi/v1.4/downloads/openmpi-$OPENMPIVERSION.tar.bz2 #tar xfj openmpi-$OPENMPIVERSION.tar.bz2 #cd openmpi-$OPENMPIVERSION/ #./configure #make -j8 all #make install #sync #cd ../ #rm -R -f openmpi* #ldconfig #sync # petry: -disable ganglia daemons by default # -disable dnsmasq as we start that manually and we # don't want it running on the nodes (messes with ganglia) # -disable avahi-daemon, yet another dns-caching mechanism which messes with ganglia # -disable firewall update-rc.d -f ganglia-monitor remove update-rc.d -f gmetad remove update-rc.d -f apache2 remove update-rc.d -f dnsmasq remove update-rc.d -f avahi-daemon remove update-rc.d -f arno-iptables-firewall remove if [ -f "/etc/skel/make_pelicanhome.sh" ]; then cd /etc/skel sh ./make_pelicanhome.sh fi CHROOTHOOK chmod +x config/chroot_local-hooks/script.sh ######## WRITE THE PELICAN SETUP SCRIPTS ############# cat <<01BOOTSETUP > config/chroot_local-includes/usr/bin/pelican_boot_setup #!/bin/bash PATH="/bin:/sbin:/usr/bin:/usr/sbin" export PATH TMP="/tmp/pelican_setup.tmp\$\$" DIALOG="dialog" TITLE="Pelican Setup" exec >/dev/console &1 NORMAL="" RED="" GREEN="" YELLOW="" BLUE="" MAGENTA="" CYAN="" WHITE="" bailout(){ # set user password # use /home/user/pw if it exists rather than asking for it. if [ ! "\$PASSWORD_PROMPT" = "NO" ]; then \$DIALOG --title "\$TITLE" --inputbox "Welcome to PelicanHPC. Please type in a password, and then press to continue" 15 50 "PleaseChangeMe!" 2>/home/user/pw chmod 644 /home/user/pw fi read PASSWORD /etc/hosts.tmp mv /etc/hosts.tmp /etc/hosts hostname "\$NODEHOSTNAME" chown -R user.user /home/user echo "" echo -n "" echo "\${GREEN}Welcome to \${CYAN}PelicanHPC\${GREEN}!" echo "" echo "To log in, enter \${YELLOW}user\${GREEN} as the username, and the password you just specified." echo "After you're logged in, you can:" echo "" echo "\${BLUE}* create a cluster: type \${YELLOW}pelican_setup\${GREEN}" echo "\${BLUE}* enter a desktop environment: type \${YELLOW}startx\${GREEN}" echo "" echo "For more information, visit http://pelicanhpc.org. Have fun!\${NORMAL}" echo "" #echo "alias mpirun='mpirun -x PRELOAD=libmpi.so'" >> /home/user/.bashrc # pelican_config: a switch to run a local script and pelican_setup if [ "\$RUN_FRONTEND_BOOT" = "YES" ]; then if [ -e /home/user/pelican_frontend_boot ]; then /home/user/pelican_frontend_boot fi fi if [ "\$RUN_PELICAN_SETUP" = "YES" ]; then exec pelican_setup fi exit \$1 } bailout2(){ read PASSWORD /etc/hosts.tmp mv /etc/hosts.tmp /etc/hosts hostname "\$NODEHOSTNAME" echo "All done setting up this PelicanHPC compute node" echo "" echo -n "" echo "\${GREEN}This is a \${CYAN}PelicanHPC \${GREEN}compute node. It is part of a cluster of computers that is doing some \${YELLOW}REALLY important stuff\${GREEN}." echo "" echo "Please don't try to use it, and \${RED}DON'T TURN IT OFF!" echo "" echo "\${GREEN}THANKS!\${BLUE}" # petry: Added a switch to run a local node script and beep if [ "\$RUN_NODE_BOOT" = "YES" ]; then if [ -e /home/user/pelican_node_boot ]; then /home/user/pelican_node_boot fi fi if [ ! -z "\$NODE_BEEP" ]; then beep \$NODE_BEEP fi exit \$1 } echo "Welcome to PelicanHPC!" # things for both frontend and nodes echo "" > /etc/motd echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config /etc/init.d/procps restart # support software compiled using older Open MPI ln -s /usr/lib/openmpi/lib/libopen-rte.so.0.0.0 /usr/lib/openmpi/lib/liborte.so.0 ln -s /usr/lib/openmpi/lib/libopen-pal.so.0.0.0 /usr/lib/openmpi/lib/libopal.so.0 # test for master or compute node: # petry: changed this just to look for the words 'nfsroot' rather than # the ip address \$PELICAN_NETWORK since 'nfsroot' will also only be there at # boot if this is a compute node. That way we aren't tied into a particular # ip before we read the config file. computenode=\`dmesg | grep -c nfsroot\` if [ \$computenode = 0 ]; then # master node echo "making initrd.img for netboot, this will take a little while" mkinitramfs -d /etc/initramfs-pxe -o /var/lib/tftpboot/initrd.img & mkdir -p /home rm -f "\$TMP" # petry: Search for a device with the label "PELHOME" to mount as the home # directory should it exist. Note some filesystems (vfat) only allow 11 # character labels. Also note that the cdrom already has label PelicanHPC. # This mounting of a home directory in this way is similar to the idea # of a persistent partition with which it could perhaps be replaced. # Uppercase was chosen for the label since vfat seems to require this. HOMELOCATION=\$(blkid -L "PELHOME") # petry: If found, mount the directory and check whether the configuration # file expressly overrides asking about mounting home. if [ ! -z "\$HOMELOCATION" ]; then mount \$HOMELOCATION /home if [ -e /home/user/pelican_config ]; then AUTOMOUNT_HOME_IS_NO=\$(grep -v "#" /home/user/pelican_config | grep 'AUTOMOUNT_HOME_PROMPT="NO"') fi if [ -z "\$AUTOMOUNT_HOME_IS_NO" ]; then MESSAGE="A pelican home directory (identified by the device label PELHOME) was found on the device \$HOMELOCATION . Do you want to mount it as your home directory? (Doing so will cause pelican HPC to use any existing pelican_config file to override default behaviour during startup.)" if ! \$DIALOG --title "\$TITLE" --yesno "\$MESSAGE" 10 50 ; then umount /home HOMELOCATION="" fi fi fi if [ -z "\$HOMELOCATION" ]; then \$DIALOG --title "\$TITLE" --inputbox "Do NOT type anything into this input box, it is only for the convenience of advanced users who have experience using PelicanHPC, and who have read the documentation to learn what it is used for. Press to continue" 15 50 "-t tmpfs -o size=400m tmpfs" 2>"\$TMP" read HOMELOCATION <"\$TMP" ; rm -f "\$TMP" mount \$HOMELOCATION /home fi install -d /home/user # petry: default variable values FRONTEND_SCRATCH_MOUNT="/usr/local/scratch" # petry: Load the configuration file if it exists if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi # petry: Check for a device with the label "PELSCRATCH" and mount it at # /usr/local/scratch . While /home is exported to all the nodes, it could # be useful to have local scratch space both here on the frontend and also # on the nodes. if [ "\$DETECT_FRONTEND_SCRATCH" = "YES" ]; then SCRATCHLOCATION=\$(blkid -t LABEL="PELSCRATCH" -o device -l) fi if [ ! -z "\$SCRATCHLOCATION" ]; then mkdir -p \$FRONTEND_SCRATCH_MOUNT mount \$SCRATCHLOCATION \$FRONTEND_SCRATCH_MOUNT fi # petry: add a link to shutdown the nodes in the halt and reboot runlevels (0) # and (6) respectively. if [ "\$AUTO_NODE_SHUTDOWN" = "YES" ]; then ln -s /usr/bin/pelican_nodes /etc/rc0.d/K10pelican_nodes ln -s /usr/bin/pelican_nodes /etc/rc6.d/K10pelican_nodes fi # petry: Check whether to prompt for copying files if [ "\$COPY_FILE_PROMPT" = "NO" ]; then bailout fi MESSAGE="Copy user configuration and examples to /home/user? You should probably choose YES unless you are using a specially made version of PelicanHPC that uses a permanent storage device, and you already have a setup you would like to keep." \$DIALOG --defaultno --title "\$TITLE" --yesno "\$MESSAGE" 10 50 || bailout rsync -avz /etc/skel/ /home/user/ mkdir /home/user/backup # for vim backup files # Start up xfce4 install -d /home/user/.config/autostart echo "[Desktop Entry]" >> /home/user/.config/autostart/xfce4-tips-autostart.desktop echo "Hidden=true" >> /home/user/.config/autostart/xfce4-tips-autostart.desktop # echo "/usr/bin/startx" >> /home/user/.bashrc # uncomment to make entry into GUI automatic # Desktop links for tutorial and econometrics ln -s /home/user/Econometrics/econometrics_pelican.pdf /home/user/Desktop/econometrics.pdf ln -s /home/user/Tutorial/PelicanTutorial.pdf /home/user/Desktop/PelicanTutorial.pdf bailout # for the compute nodes else # petry: deleted the following, added the -p to mkdir #rm -R -f /home mkdir -p /home # petry: extract the frontend IP address from dmesg since we can't read # config file yet. PELICAN_FRONTEND=\$(dmesg | tr '\n' ' ' | sed 's/.*nfsroot=\([^:]*\):.*/\1/') mount \$PELICAN_FRONTEND:/home /home # petry: default variable values PELICAN_NETWORK="10.11.12" NODE_SCRATCH_MOUNT="/usr/local/scratch" # petry: Load the configuration file if it exists if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi # petry: shut down if we are not running setup and SHUTDOWN_IF_NOT_SETUP # is configured if [ "\$SHUTDOWN_IF_NOT_SETUP" = "YES" ]; then if [ ! -e /home/user/.running_pelican_restart_hpc ]; then shutdown -h now fi fi myip=(\`/sbin/ifconfig | grep -i "Ethernet" -A 1|grep "inet addr"|cut -d " " -f 12|cut -d ":" -f 2\`) echo "\$myip is up" > /home/user/tmp/\$myip # petry: Check for a device with the label "PELSCRATCH" and mount it at # /usr/local/scratch . While /home is exported to all the nodes, it could # be useful to have local scratch space both here on the nodes. if [ "\$DETECT_NODE_SCRATCH" = "YES" ]; then SCRATCHLOCATION=\$(blkid -t LABEL="PELSCRATCH" -o device -l) fi if [ ! -z "\$SCRATCHLOCATION" ]; then mkdir -p \$NODE_SCRATCH_MOUNT mount \$SCRATCHLOCATION \$NODE_SCRATCH_MOUNT fi # support software compiled using older Open MPI ln -s /usr/lib/openmpi/lib/libopen-rte.so.0.0.0 /usr/lib/openmpi/lib/liborte.so.0 ln -s /usr/lib/openmpi/lib/libopen-pal.so.0.0.0 /usr/lib/openmpi/lib/libopal.so.0 bailout2 fi 01BOOTSETUP cat <<02SETUP > config/chroot_local-includes/usr/bin/pelican_setup #!/bin/bash pelican_setup_user pelican_setup_netdevice pelican_terminalserver pelican_restart_hpc 02SETUP cat <<03USER > config/chroot_local-includes/usr/bin/pelican_setup_user #!/bin/bash bailout(){ exit \$1 } # petry: set default variable values PELICAN_NETWORK="10.11.12" MAXNODES="100" # petry: load config file (potentially override defaults). if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi cd /home/user HOME="/home/user" PKTMP="\$HOME/tmp" echo "Creating temporary directory" rm -R -f \$PKTMP mkdir \$PKTMP chown user.user \$PKTMP chmod 777 \$PKTMP # generate keys if not there if [ -d "/home/user/.ssh" ]; then echo "ssh already configured" else echo "Generating new RSA keys" ssh-keygen -q -t rsa -N "" -f "\$HOME/.ssh/id_rsa" cp \$HOME/.ssh/id_rsa.pub \$HOME/.ssh/authorized_keys chmod 600 \$HOME/.ssh/authorized_keys fi rm -f \$HOME/.ssh/known_hosts # make list of hosts to fping echo "\$PELICAN_NETWORK.2" > \$HOME/fpinghosts i=2 # petry: changed the following to -le from -lt to include MAXNODES # to be consistent with dnsmasq setup. while [ \$i -le \$MAXNODES ] do i=\`expr \$i + 1\` echo \$PELICAN_NETWORK.\$i >> \$HOME/fpinghosts done bailout 03USER cat <<04NETDEVICE > config/chroot_local-includes/usr/bin/pelican_setup_netdevice #!/bin/bash # modification of netcardconfig from Knoppix. PATH="/bin:/sbin:/usr/bin:/usr/sbin" export PATH DIALOG="dialog" [ "\`id -u\`" != "0" ] && exec sudo "\$0" "\$@" TMP="/tmp/netconfig.tmp\$\$" # Define default variables PELICAN_NETWORK="10.11.12" FW_SERVICES_TCP="22" FW_SERVICES_UDP="" FW_ICMP_ECHO="YES" FW_NAT="YES" # petry: Load the configuration file if it exists, potentially override variables. if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi bailout(){ rm -f "\$TMP" exit \$1 } TITLE="Pelican Setup" MESSAGE0="No supported network cards found." MESSAGE1="Please select the network device that connects to the cluster." NETDEVICES="\$(cat /proc/net/dev | awk -F: '/eth.:|br.:|tr.:|wlan.:/{print \$1}')" if [ -z "\$NETDEVICES" ]; then \$DIALOG --title "\$TITLE" --msgbox "\$MESSAGE0" 5 30 bailout fi count="\$(echo "\$NETDEVICES" | wc -w)" if [ "\$count" -gt 1 ]; then DEVICELIST="" for DEVICE in \$NETDEVICES; do DEVICELIST="\$DEVICELIST \${DEVICE} network_device_\${DEVICE}"; done rm -f "\$TMP" echo "Hints about your net devices" dmesg|grep eth # petry: Only prompt if configuration variable not defined. if [ -z "\$CLUSTER_NETWORK" ]; then rm -f "\$TMP" \$DIALOG --menu "\$MESSAGE1" 18 45 12 \$DEVICELIST 2>"\$TMP" || bailout read DV <"\$TMP" ; rm -f "\$TMP" fi else # Remove additional spaces DV="\$(echo \$NETDEVICES)" fi IP="\$PELICAN_NETWORK.1" NM="255.255.255.0 " BC="\$PELICAN_NETWORK.255" # petry: Override device if configuration variable defined. if [ ! -z "\$CLUSTER_NETWORK" ]; then if ! echo \$CLUSTER_NETWORK | grep -q "eth" ; then MAC_ADDRESS=\$CLUSTER_NETWORK CLUSTER_NETWORK=\$(/sbin/ifconfig -a | grep "\$MAC_ADDRESS" | cut -f 1 --delimiter=" ") if [ -z "\$CLUSTER_NETWORK" ]; then echo "CLUSTER_NETWORK variable \$MAC_ADDRESS is not a valid device or MAC address." bailout fi fi DV=\$CLUSTER_NETWORK fi ifdown \$DV sleep 4 CMD="ifconfig \$DV \$IP netmask \$NM broadcast \$BC up" \$CMD sleep 4 # petry: configure firewall if [ ! "\$FIREWALL" = "YES" ]; then bailout fi # put our variables in form suitable for debconf if [ "\$FW_ICMP_ECHO" = "NO" ]; then FW_ICMP_ECHO="false" else FW_ICMP_ECHO="true" fi if [ "\$FW_NAT" = "NO" ]; then FW_NAT="false" else FW_NAT="true" fi # Get external net device list if not provided if [ -z "\$FW_CONFIG_EXT_IF" ]; then for DEVICE in \$NETDEVICES; do if [ ! "\$DEVICE" = "\$DV" ]; then FW_CONFIG_EXT_IF="\$FW_CONFIG_EXT_IF \$DEVICE" fi done fi if [ -z "\$FW_CONFIG_EXT_IF" ]; then echo "No external ethernet devices found. Not starting firewall." bailout fi # Use debconf to configure the firewall. (requires debconf-utils) # We need to remove old configuration file first for this to work it seems. rm -f /etc/arno-iptables-firewall/debconf.cfg debconf-set-selections < config/chroot_local-includes/usr/bin/pelican_terminalserver #!/bin/sh PATH="/bin:/sbin:/usr/bin:/usr/sbin" export PATH DIALOG="dialog" # petry : define variable defaults PELICAN_NETWORK="10.11.12" MAXNODES="100" # petry : Load configuration file (potentially override defaults) if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi [ "\`id -u\`" != "0" ] && exec sudo "\$0" "\$@" bailout(){ exit \$1 } # petry: added read-ethers to allow static IP cat < /etc/dnsmasq.conf dhcp-range=\$PELICAN_NETWORK.2,\$PELICAN_NETWORK.\$MAXNODES,255.255.255.0,12h dhcp-boot=pxelinux.0,pelican,\$PELICAN_NETWORK.1 read-ethers DNSMASQ # petry: create /etc/ethers if NODE_INFO defined. # just put empty file if it isn't. touch /etc/ethers if [ ! -z "\$NODE_INFO" ]; then # first make list of reserved IPs RESERVED_HOSTS="" for i in \$NODE_INFO; do NODEHOST="" if echo \$i | grep -q "i"; then NODEHOST=\$(echo \$i | sed 's/[^i]*i0*\([0-9]*\).*/\1/') fi if [ ! -z "\$NODEHOST" ]; then RESERVED_HOSTS="\$RESERVED_HOSTS \$NODEHOST " fi done # create /etc/ethers rm -f /etc/ethers HOST=1 for i in \$NODE_INFO; do NODEHOST="" if echo \$i | grep -q "i"; then NODEHOST=\$(echo \$i | sed 's/[^i]*i0*\([0-9]*\).*/\1/') fi if [ -z "\$NODEHOST" ]; then HOST=\$((HOST+1)) while echo "\$RESERVED_HOSTS" | grep -q " \$HOST "; do HOST=\$((HOST+1)) done NODEHOST=\$HOST fi NODEIP="\$PELICAN_NETWORK.\$NODEHOST" NODEHWADDR=\$(echo \$i | cut -b 1-17) echo "\$NODEHWADDR \$NODEIP" >> /etc/ethers done fi cat < /etc/xinetd.d/tftp-hpa service tftp { disable = no id = chargen-dgram socket_type = dgram protocol = udp user = root wait = yes server = /usr/sbin/in.tftpd server_args = -s /var/lib/tftpboot/ } TFTP cat < /etc/default/tftpd-hpa TFTP_USERNAME="tftp" TFTP_DIRECTORY="/var/lib/tftpboot" TFTP_ADDRESS="0.0.0.0:69" TFTP_OPTIONS="--secure" TFTP2 # set up PXE service install -d /var/lib/tftpboot/pxelinux.cfg cp /usr/lib/syslinux/pxelinux.0 /var/lib/tftpboot/ cp /boot/vmlinuz-\`uname -r\` /var/lib/tftpboot/ cat << PXECONFIG > /var/lib/tftpboot/pxelinux.cfg/default DEFAULT linux LABEL linux KERNEL vmlinuz-\`uname -r\` APPEND initrd=initrd.img nfsroot=\$PELICAN_NETWORK.1:/live/image ip=dhcp rw noautologin noxautologin union=aufs netboot=nfs boot=live PXECONFIG # generate /etc/exports cat < /etc/exports /live/image *(ro,async,no_subtree_check,no_root_squash,fsid=12345) /home \$PELICAN_NETWORK.0/255.255.255.0(rw,no_root_squash,async,no_subtree_check,fsid=54321) EXPORTS #-------------------------------------------------- # # this is needed for sid, as of Sept 2009 # cat < /etc/default/portmap # OPTIONS= # PORTMAP #-------------------------------------------------- # start services # petry: Don't prompt if config variable set to override it if [ ! "\$NETBOOT_PROMPT" = "NO" ]; then TITLE="Start Pelican HPC netboot services" MESSAGE="We now get ready to set up the cluster by starting services that will allow the compute nodes to netboot. IMPORTANT: do not proceed if your cluster is on an existing network, or PelicanHPC's dhcp server may conflict with a running dhcp server. Continue?" \$DIALOG --title "\$TITLE" --yesno "\$MESSAGE" 15 90 || bailout fi sync /etc/init.d/portmap restart /etc/init.d/xinetd restart /etc/init.d/tftpd-hpa restart /etc/init.d/dnsmasq restart /etc/init.d/nfs-kernel-server restart sleep 5 bailout 05TERMINALSERVER cat <<06HPC > config/chroot_local-includes/usr/bin/pelican_restart_hpc #!/bin/sh # petry: script needs to run as user 'user' for ssh to nodes to work. if [ ! "\$(whoami)" = 'user' ]; then su user -c "pelican_restart_hpc" exit 0 fi PKTMP="/home/user/tmp" PATH="/bin:/sbin:/usr/bin:/usr/sbin" export PATH DIALOG="dialog" # petry : set flag indicating we're running this script (used by # SHUTDOWN_IF_NOT_SETUP config variable) touch /home/user/.running_pelican_restart_hpc # petry : default variable values PELICAN_NETWORK="10.11.12" GANGLIA_PROMPT="NO" GANGLIA="YES" # petry : load configuration file (override default values potentially) if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi FRONTENDIP="\$PELICAN_NETWORK.1" # get cluster ethernet device. Need to escape periods for sed FRONTENDIP_FOR_SED=\$(echo "\$FRONTENDIP" | sed 's/\./\\\\\./g') CLUSTER_DEV=\$(/sbin/ifconfig -a | tr -d '\n' | sed "s/.*\(eth[^ ]*\).*\$FRONTENDIP_FOR_SED.*/\1/") bailout(){ # petry : remove flag indicating we are running this script rm -f /home/user/.running_pelican_restart_hpc exit 0 } # check which nodes are up checknodes(){ rm -f \$PKTMP/bhosts sudo fping -a -q -r0 -f /home/user/fpinghosts > \$PKTMP/bhosts } retry(){ checknodes NNODES="\$(grep -c "" \$PKTMP/bhosts)" MESSAGENODES="\nGo turn on your compute nodes now. \n\nAt the moment \$NNODES compute nodes (not counting this frontend node) are available. \n\nClick no to rescan the available nodes. Click yes when the desired number of nodes are available. You might want to wait a bit if some nodes are still finishing booting up." \$DIALOG --title "\$TITLE" --defaultno --yesno "\$MESSAGENODES" 20 50 || retry } trap bailout 1 2 3 15 # petry: override restarting MPI warning if configured in config file if [ ! "\$MPI_WARNING_PROMPT" = "NO" ]; then TITLE="Restart Pelican HPC" MESSAGE="\nTime to bring the compute nodes into the cluster. If you are resizing a running cluster, be aware that continuing will interrupt any running MPI jobs. Continue?" \$DIALOG --title "\$TITLE" --yesno "\$MESSAGE" 15 90 || bailout fi # petry: Send WOL signals to nodes if configured in pelican config file. if [ "\$WOL_NODES" = "YES" ]; then if [ ! "\$WOL_PROMPT" = "NO" ]; then TITLE="Restart Pelican HPC" MESSAGE="\nComputers for your cluster that have been configured in your pelican config file may be sent signals over the local area network to start them. Do you want to send wake-on-lan signals to machines configured in your pelican config file?" if \$DIALOG --title "\$TITLE" --yesno "\$MESSAGE" 15 90 ; then WOL="YES" else WOL="NO" fi else WOL="YES" fi if [ "\$WOL" = "YES" ]; then /usr/bin/pelican_nodes start fi fi # petry: Only prompt to turn on nodes if not overridden in config file if [ ! "\$DETECT_NODES_PROMPT" = "NO" ]; then retry else checknodes fi # master must be last in the list # petry: allow override in config file whether to include frontend if [ ! "\$FRONTEND_IN_MPI" = "NO" ]; then echo "\$FRONTENDIP" | cat >> \$PKTMP/bhosts fi # # Configure /etc/hosts on frontend for proper dns behaviour. HOSTIPS=\$(cat \$PKTMP/bhosts | tr "\n" " ") cat >> /tmp/hosts\$\$ <> /tmp/hosts\$\$ fi done sudo mv /tmp/hosts\$\$ /etc/hosts # Now refresh dnsmasq sudo killall -HUP dnsmasq # # petry: add slots information to bhosts TMPBHOSTS="/tmp/bhosts\$\$" for HOSTIP in \$HOSTIPS; do SLOTS=\$NUM_SLOTS ETHERS_ENTRY=\$(grep -s \$HOSTIP /etc/ethers) if [ ! -z "\$ETHERS_ENTRY" ]; then MAC=\$(echo "\$ETHERS_ENTRY" | cut --delimiter=" " -f 1) NODE_INFO_SLOTS=\$(echo "\$NODE_INFO" | grep -o "\$MAC[^ ]*" | grep 's' | sed 's/[^s]*s0*\([0-9]*\).*/\1/') if [ ! -z "\$NODE_INFO_SLOTS" ]; then SLOTS="\$NODE_INFO_SLOTS" fi fi if [ -z "\$SLOTS" ]; then echo "\$HOSTIP" >> /tmp/bhosts\$\$ elif [ "\$SLOTS" = "0" ]; then # no entry so do nothing true else echo "\$HOSTIP slots=\$SLOTS" >> /tmp/bhosts\$\$ fi done mv /tmp/bhosts\$\$ \$PKTMP/bhosts # petry: start ganglia, prompt if requested in config file. if [ "\$GANGLIA_PROMPT" = "YES" ]; then STARTGANGLIA="NO" TITLE="Restart Pelican HPC" MESSAGE="\nThe ganglia cluster monitoring software starts a web server on the frontend which you can point your browser at either locally (localhost) or remotely to monitor your cluster. Do you want to start ganglia (enable the necessary daemons on the frontend and nodes)?" if \$DIALOG --title "\$TITLE" --yesno "\$MESSAGE" 15 90 ; then STARTGANGLIA="YES" fi elif [ "\$GANGLIA" = "YES" ]; then STARTGANGLIA="YES" else STARTGANGLIA="NO" fi if [ "\$STARTGANGLIA" = "YES" ]; then # petry: We have to configure the ganglia webfrontend, ganglia-monitor and # gmetad on the frontend, and ganglia-monitor on all the nodes. # stop all local daemons sudo /etc/init.d/apache2 stop sudo /etc/init.d/ganglia-monitor stop sudo /etc/init.d/gmetad stop # setup webfrontend and apache sudo mkdir -p /var/www/html sudo rm -rf /var/www/html/ganglia sudo cp -rp /usr/share/ganglia-webfrontend /var/www/html/ganglia APACHE_SITES="/etc/apache2/sites-available/default" # check we haven't already configured apache if ! grep -q "ganglia" "\$APACHE_SITES"; then sed 's/\/var\/www\//\/var\/www\/html\/ganglia\//g' "\$APACHE_SITES" > /tmp/apache_sites\$\$ sudo mv /tmp/apache_sites\$\$ "\$APACHE_SITES" fi # configure frontend gmond.conf file. Strip out the lines we don't want (any # default configuration) and add the ones we do. grep "^#" /etc/gmond.conf > /tmp/gmond.conf\$\$ cat >> /tmp/gmond.conf\$\$ < /tmp/gmetad.conf\$\$ cat >> /tmp/gmetad.conf\$\$ <> "\$CONFIGURE_NODE_GMOND" <> /etc/gmond.conf fi !NODE_GMOND_CONF! chmod a+x "\$CONFIGURE_NODE_GMOND" for HOSTIP in \$HOSTIPS; do if [ ! "\$HOSTIP" = "\$FRONTENDIP" ]; then ssh "\$HOSTIP" sudo /etc/init.d/ganglia-monitor stop ssh "\$HOSTIP" sudo "\$CONFIGURE_NODE_GMOND" ssh "\$HOSTIP" sudo /etc/init.d/ganglia-monitor start fi done rm -f "\$CONFIGURE_NODE_GMOND" # restart frontend services sudo /etc/init.d/ganglia-monitor start sudo /etc/init.d/gmetad start sudo /etc/init.d/apache2 start fi # ganglia code finished. # display success message NNODES="\$(grep -c "" \$PKTMP/bhosts)" # final report # petry: override final report if configured in config file if [ ! "\$FINAL_REPORT_PROMPT" = "NO" ]; then if [ ! "\$FLOPS_TEST" = "NO" ]; then SUCCESS="\nYour cluster of \$NNODES nodes is (probably) ready to use, a rudimentary test of the cluster will follow. If there was a problem, just re-run the script.\nThe nodes in the cluster are listed in the file ~/tmp/bhosts. If you add or remove compute nodes, re-run this script (pelican_restart_hpc) whenever you like." else SUCCESS="\nYour cluster of \$NNODES nodes is (probably) ready to use. The nodes in the cluster are listed in the file ~/tmp/bhosts. If you add or remove compute nodes, re-run this script (pelican_restart_hpc) whenever you like." fi \$DIALOG --title "\$TITLE" --msgbox "\$SUCCESS" 15 50 fi echo -n "" # flops.f simple test of cluster if [ ! "\$FLOPS_TEST" = "NO" ]; then if [ -e /home/user/flops.f ]; then cd /home/user mpif77 -o flops flops.f # petry: removed -np \$NNODES from the following - it will use one slot # per host if slots are not defined anyway, and it will use the number of # slots found if they are defined which is desirable. mpirun --hostfile /home/user/tmp/bhosts /home/user/flops fi fi # petry: add option for final local script if [ "\$RUN_FRONTEND_LOCAL" = "YES" ]; then if [ -e /home/user/pelican_frontend_local ]; then /home/user/pelican_frontend_local fi fi bailout 06HPC cat <<07NODES > config/chroot_local-includes/usr/bin/pelican_nodes #!/bin/sh # petry: script takes a single argument, start or stop. If "start" it # will WOL any machines configured with NODE_INFO in the # pelican_config file. If given the "stop" argument it will # shut down all active nodes on the cluster except the # frontend. This script can be configured to run automatically # when the frontend shuts down - see pelican_config entry # AUTO_NODE_SHUTDOWN . # # petry: script needs to run as user 'user' for ssh to nodes to work. if [ ! "\$(whoami)" = 'user' ]; then su user -c "pelican_nodes \$1" exit 0 fi # petry: define default values here PELICAN_NETWORK="10.11.12" NODE_INFO="" WOL_NODE_DELAY=5 WOL_FINAL_DELAY=10 NODE_SHUTDOWN_DELAY="2" # petry: load config file (potentially override defaults) if [ -e /home/user/pelican_config ]; then . /home/user/pelican_config fi if [ "\$1" = "start" ]; then FRONTENDIP="\$PELICAN_NETWORK.1" # get cluster ethernet device. Need to escape periods for sed FRONTENDIP_FOR_SED=\$(echo "\$FRONTENDIP" | sed 's/\./\\\\\./g') CLUSTER_DEV=\$(/sbin/ifconfig -a | tr -d '\n' | sed "s/.*\(eth[^ ]*\).*\$FRONTENDIP_FOR_SED.*/\1/") for HOST in \$NODE_INFO; do HOST_MAC=\$(echo \$HOST | cut -b 1-17) sudo etherwake -i "\$CLUSTER_DEV" "\$HOST_MAC" sleep "\$WOL_NODE_DELAY" done sleep "\$WOL_FINAL_DELAY" elif [ "\$1" = "stop" ]; then PKTMP="/home/user/tmp" for HOSTIP in \$(sed 's/\([^ ]*\).*/\1/' "\$PKTMP/bhosts"); do if [ ! "\$HOSTIP" = "\$PELICAN_NETWORK.1" ]; then ssh "\$HOSTIP" "sudo shutdown -h now" sleep "\$NODE_SHUTDOWN_DELAY" fi done exit 0 else echo "usage is pelican_nodes start|stop" fi 07NODES cat <<08CONFIG > config/chroot_local-includes/etc/skel/pelican_config #!/bin/sh # pelican_config # # This file defines a series of variables that modify the behaviour of # the live pelican hpc cluster on subsequent reboots. Its purpose is # to allow a less-interactive booting of pelicanHPC as well as to add # useful features that require minor configuration. # # To make changes to this file permanent requires that the cluster # have a permanent home directory. This requires formatting a # partition (hard drive, usb stick, etc.) with a filesystem (ext2/3, # etc.) and selecting it rather than ram1 at boot. See the # documentation in Desktop/PelicanTutorial.pdf # # If one desires that the pelican cluster boot with no interaction at # all it is necessary that the home partition (containing this file) # be automatically mounted. This is done by giving the partition the # label "PELHOME" which pelicanHPC looks for upon boot. How to label a # partition depends on the filesystem. For a linux ext2/3 filesystem # this can be done with the command # # sudo /sbin/e2label "PELHOME" # # where is the partition (eg. /dev/sda1 etc.) For an msdos # (vfat) filesystem the mtools package can be used. From Linux, one # first has to map the device to a letter by editing the # /etc/mtools.conf file. Then if, for instance, the drive letter # chosen is c, one could do # # sudo mlabel c:"PELHOME" # # Note that using a vfat partition for home has not been tested. # # By default this file configures absolutely nothing (changes no # behaviour during boot) unless edited, so it can be safely left in a # permanent home partition. As well this config file need not be kept # since all default configuration is within PelicanHPC. # # ------------------------------------------------------------------------ # # Each config entry is described, followed by a sample configuration # (usually changing it from the default), and then (optionally) a # usage note. # # Give prompt regarding whether detected home filesystem should be # mounted. ( Default gives prompt. ) If the prompt is not given and a # home filesystem is detected IT WILL BE MOUNTED, thereby allowing a # headless boot. # AUTOMOUNT_HOME_PROMPT="NO" # Usage note: Due to how this entry is read from the config file, it # must appear exactly as it is (without the #) if it is to be used. # Other entries in this config file are read into the scripts as usual # shell commands and are more forgiving. That said, where "YES" or # "NO" are required, only those values are interpreted correctly. # Values of "y", "Y", "n", "N", "yes" or "no" WILL NOT be interpreted # correctly and will result in the default behaviour being selected. # # The network address for your cluster. Defaults to 10.11.12 if this # is undefined. You likely only need to change this if you have # another frontend ethernet device already with a 10.x.x.x # IP address. The following should work in that case. # PELICAN_NETWORK="192.168.0" # # The maximum number of machines in your cluster. Defaults to 100 if # undefined. Node IPs will range from \$PELICAN_NETWORK.2 to # \$PELICAN_NETWORK.\$MAXNODES . Maximum value is 255. If this number # is unnecessarily large time is wasted pinging during setup. # MAXNODES=100 # # Local permanent storage (scratch space) on either the frontend or a # node can be mounted by labeling a partition "PELSCRATCH" (as # described above for permanent home detection). Pelican HPC will # detect and mount such partitions if the following are set (default # to no detection.) The default scratch mountpoints are # /usr/local/scratch if not specified in the last two variables. The # difference between this local space and the home directory is that # the latter is shared via NFS to all the nodes. # DETECT_FRONTEND_SCRATCH="YES" # DETECT_NODE_SCRATCH="YES" # FRONTEND_SCRATCH_MOUNT="/usr/local/scratch" # NODE_SCRATCH_MOUNT="/usr/local/scratch" # # Give prompt whether to copy example files or not. (Default gives # prompt.) If no prompt is given, no files are copied. # COPY_FILE_PROMPT="NO" # # Give prompt whether to request a password. (Default gives prompt.) # If set to NO the existing password in /home/user/pw is used. # PASSWORD_PROMPT="NO" # # Run executable /home/user/pelican_frontend_boot (if it exists) on # the frontend after initial boot. Defaults to not run such an executable. # If RUN_PELICAN_SETUP is set to yes, the cluster setup is run after # the pelican_frontend_boot executable. Remember to make any script # executable. (chmod a+x) This executable is run with root privilege. # See also the RUN_FRONTEND_LOCAL entry below. # RUN_FRONTEND_BOOT="YES" # # Run executable /home/user/pelican_node_boot (if it exists) on each # node after initial boot. Defaults to not run such an executable. # Remember to make any script executable. (chmod a+x) This executable # is run with root privilege. # RUN_NODE_BOOT="YES" # # Have node sound a beep upon completion of boot. Default is not to # beep. Value of NODE_BEEP can be any arguments suitable for the "beep" # command. # NODE_BEEP="-r 2 -d 10" # # Automatically run the pelican_setup routine after booting. That # routine is what sets up the cluster proper (starts DHCP, searches # for nodes, etc.) (The default is for the user to run pelican_setup # manually after the frontend boots.) # RUN_PELICAN_SETUP="YES" # # Network device connected to cluster. If this is defined then the # user will not be prompted to choose the network device should # multiple devices be detected. This can either be the device name # (eth0, eth1, etc.) or it can be the MAC hardware address of the # ethernet card attached to the cluster network exactly as it appears # when running # /sbin/ifconfig -a # (The eth? device name will be found from the MAC address using that # command.) The purpose of allowing a MAC Hardware address is to avoid # problems with device names not being consistently mapped to hardware # at boot. # CLUSTER_NETWORK="eth1" # # Give prompt whether to start the services required for netbooting # (dhcp, nfs exporting, etc.) Default is to prompt. If no prompt is # given, the netboot services are automatically started. # NETBOOT_PROMPT="NO" # # Start firewall. As part of networking services pelicanHPC can start # a firewall on the frontend that will restrict access to the frontend # on any external network devices while allowing unrestricted # communication on the internal (cluster) network. The next few # variables (FW_*) allow modification of the firewall behaviour. # Default is to NOT start the firewall. # FIREWALL="YES" # # External network devices to be firewalled. External network # interface(s) can be specified here which the firewall should block. # Multiple interfaces should be space separated. If not specified the # default is to firewall all network devices autodetected which are # not the cluster network. The following is just a sample for # formatting. # FW_CONFIG_EXT_IF="eth0 eth2 wlan0" # Note: The internal (cluster) network which is trusted is # autodetected. # TCP ports to leave open (not block with firewall) on external # interfaces. Default is to only allow secure shell (port 22). # Multiple ports must be space separated. The following would allow # external connection to the webserver (port 80) as well so that the # cluster status could be monitored by anyone externally. # FW_SERVICES_TCP="22 80" # Note: With the default configuration (just ssh allowed) one may # still use ssh -X to log into the frontend remotely followed by # starting a web browser on that machine to look at the web frontend # of the ganglia monitoring system. # # UDP ports to leave open (not block with the firewall) on external # interfaces. Default is to block all UDP ports. Multiple ports must # be space separated. # FW_SERVICES_UDP="" # # Allow pinging of external interfaces. By default the firewall will # allow pings to the external interface (with some restrictions to # prevent certain types of attacks). The following will disable any # ping response. # FW_ICMP_ECHO="NO" # # Allow nodes access to the Internet. By default IP masquerading # (NAT) is turned on by the firewall so that nodes can connect to the # (external) Internet. The following will disable this behaviour. # FW_NAT="NO" # # Give prompt warning that restarting the cluster will interrupt # running MPI jobs. Default is to give the prompt. # MPI_WARNING_PROMPT="NO" # # Automatically wake-on-lan (WOL) cluster nodes with MAC hardware # addresses listed in NODE_INFO (below) when setting up the cluster # (i.e. so you won't have to physically turn them on.) Nodes will be # booted in the order they appear there. Default is to not send WOL # signals to the nodes. # WOL_NODES="YES" # Note: You will have to configure the BIOS of each node to allow WOL # signals to work. A typical BIOS entry will be under "power" # configuration and it will say "power on from S5 by PME#" (which will # need to be enabled). Such computers can be sent a wake-on-lan # signal which will autostart them provided they have been previously # "soft-powered down" so they are in the ACPI D3-warm state. A normal # shutdown puts them in this state. Complete power loss (accidental # or otherwise) to a node will put them in a state where manual # booting will be required (WOL will fail.) See SHUTDOWN_IF_NOT_SETUP. # # Give prompt before sending WOL signals to cluster nodes. Default is # to give prompt. If no prompt is given, the nodes will be booted # automatically. This entry only matters if WOL_NODES is defined. # WOL_PROMPT="NO" # # Delay time interval between booting each node when sending WOL # signals. Defaults to 5 seconds if undefined. Time format must be # compatible with the "sleep" command. Only matters if WOL_NODES is # defined. # WOL_NODE_DELAY="5" # # Delay time after all nodes have been sent the wake-on-lan signal, # but before continuing setting up the final cluster. Defaults to 10 # seconds if undefined. Time format must be compatible with the # "sleep" command. Only matters if WOL_NODES is defined. Note if the # DETECT_NODES_PROMPT variable is YES (see next variable) then this # delay can be set to zero. # WOL_FINAL_DELAY="10" # # Once the pelican_setup script has been run, the terminal server # continues to operate so subsequent node boots will boot over the # LAN. (One can then rerun pelican_restart_hpc and pick them up). # This is the default behaviour. If the following variable is set to # YES, nodes will still boot over the LAN outside of the # pelican_setup/pelican_restart_hpc scripts running, however they will # then proceed to shut themselves down once the boot is complete. # SHUTDOWN_IF_NOT_SETUP="YES" # Note: The purpose of this option is to deal with power outages. As # mentioned under the WOL_NODES option, a power outage will place a # node in a state which will not typically allow a WOL signal to work. # However nodes can be configured in their BIOS to automatically boot # when their power is restored. Assuming the node BIOS is configured # in that way and the frontend is on (perhaps maintained by a UPS), # nodes will boot and promptly shut down, but now they will be in a # state suitable for a wake-on-lan signal to work again. (Since any # work that was being done by the cluster before the outage would have # been interrupted, having the nodes off - but WOL bootable - is # perhaps desirable.) Recall that the terminal server must already be # started for this to work. This means that pelican_setup must have # been run (or at least the pelican_setup_netdevice and # pelican_terminalserver subscripts which it calls). If the frontend # itself is auto-booted after a power interruption this could be # achieved using RUN_PELICAN_SETUP or alternatively by calling its # subscripts directly using the RUN_LOCAL_BOOT option. # # Prompt to physically turn on nodes to assemble the cluster. # Defaults to prompt. If set to no, then LAN booting will need to be # configured. # DETECT_NODES_PROMPT="NO" # # The default number of slots each node has for purposes of MPI. A # slot essentially corresponds to the number of parallel processes you # want running on a node and is likely the number of cores available # (or perhaps more if your processor is also hyperthreaded.) If # NUM_SLOTS is defined then the argument "slots=\$NUM_SLOTS" will be # automatically added to each entry in ~/tmp/bhosts. Default is to # not have any argument added in ~/tmp/bhosts. Note if your # environment is more heterogeneous and you require different slots # for each machine, then this can be configured individually in # NODE_INFO below. In that case only machines that do not have a # slots configuration there will be given \$NUM_SLOTS slots. # NUM_SLOTS="1" # Note ~/tmp/bhosts is the mpi "hostfile" which the user will # typically pass to the mpi executable with the --hostfile option. By # default all this will contain is all active IP's of your cluster # (including the frontend) and mpi executables will assume one slot # per machine. # # Whether to include the frontend machine in your MPI calculations. # If you are running the frontend as a virtual machine, for instance, # you may not want to include it. Default is to include the frontend # in in the calculation. # FRONTEND_IN_MPI="NO" # # Whether to start the ganglia cluster monitoring software. Ganglia # is a convenient cluster monitoring tool which can display cluster # activity at a glance. If it is started one simply has to point a # browser at the frontend's IP (or just use "localhost" if on the # frontend) and it will display the cluster statistics. If the # GANGLIA variable is set to "NO" then ganglia will not be started on # boot. Default is to start Ganglia. Note that since IP addresses # are randomly assigned to nodes it is hard to identify them unless # NODE_INFO is defined below. (Since the node names are given by # pel\$HOST where \$HOST is the last byte of the IP address, random IPs # are undesirable.) # GANGLIA="NO" # # Whether to display a prompt before starting ganglia. If # GANGLIA_PROMPT is set to "YES" then the GANGLIA variable is not used # to determine whether GANGLIA is started, rather the user is prompted # whether to start ganglia. Default is not to prompt. # GANGLIA_PROMPT="YES" # # Give final report message. Default is to give the message. # FINAL_REPORT_PROMPT="NO" # # Compile and run the flops executable on the cluster as a final test. # Default is to run the test. # FLOPS_TEST="NO" # # Run executable /home/user/pelican_frontend_local (if it exists) on # the frontend after cluster initialization is complete. Default is # to not run such an executable. Remember to make any script # executable (chmod a+x). This executable is run as the user "user" # so sudo commands are required for any script commands requiring root # privilege. Also note this executable will be run every time # pelican_restart_hpc is run manually. See the RUN_FRONTEND_BOOT # entry as well. # RUN_FRONTEND_LOCAL="YES" # # Automatically shutdown cluster nodes when frontend is shutdown. If # set to YES then shutting down (or rebooting) the frontend will # automatically send a shutdown command to each configured node as # part of the frontend's shutdown/reboot procedure. Default is to not # send shutdown commands. Note you can run the node shutdown manually # by running # pelican_nodes stop # Similarly you can WOL any configured nodes with the command # pelican_nodes start # (The latter command is run within pelican_restart_hpc.) # AUTO_NODE_SHUTDOWN="YES" # # Delay between individual node shutdowns during auto shutdown. # Default is a two second delay. Any value valid by the "sleep" # command works. This variable is irrelevant unless # AUTO_NODE_SHUTDOWN is configured or the "pelican_nodes stop" # script is run. # NODE_SHUTDOWN_DELAY=2 # # Configure hardware information for the nodes using the following # NODE_INFO variable. In its most basic form NODE_INFO lists the MAC # addresses of each of the nodes. This does the following: # 1) During boot, rather than being assigned random IP addresses, the # cluster will assign addresses sequentially with the first # available address (\$PELICAN_NETWORK.2) being assigned to the # first MAC address listed, \$PELICAN_NETWORK.3 to the second MAC # address listed, etc. (Note that \$PELICAN_NETWORK.1 is reserved # for the frontend.) These are essentially static assignments and # it doesn't matter in what order the nodes are actually turned on. # 2) If WOL_NODES is configured, machines listed here will be sent # wake-on-lan (WOL) signals in the order that they appear here. # Further configuration is possible by adding strings to the end of the # MAC hardware addresses. If one adds: # i where is from 2 to 255 then the machine will be given # the IP address \$PELICAN_NETWORK., overriding the default # sequential IP assignment described above. # s where is the number of slots available for MPI, then # this will put a "slots=" entry in the ~/tmp/bhosts file # overriding the NUM_SLOTS variable, if it is defined. If =0 # no entry at all will be put in ~/tmp/bhosts for the machine, however # it will still be set up for DHCP and WOL. (This allows one to # configure a file or print server that is on your cluster switch # but is not part of the mpi computations.) # A complicated sample NODE_INFO definition follows: # NODE_INFO="11:11:11:11:11:11s4 \\ # 22:22:22:22:22:22i101 \\ # 33:33:33:33:33:33 \\ # . . . \\ # . . . \\ # 99:99:99:99:99:99i2s2 \\ # AA:AA:AA:AA:AA:AAi102s0" # Assuming PELICAN_NETWORK="10.11.12", in the above configuration the # machines with MAC addresses 22:..., 99:..., and AA:... have their ip # addresses assigned as 10.11.12.101, 10.11.12.2, and 10.11.12.102 # respectively at boot. The remainder are assigned sequentially so # 11:... is given 10.11.12.3 (since 10.11.12.1 is the frontend and # 10.11.12.2 is taken) and 33:... is given 10.11.12.4 . Machines # 11:... and 99:... are given slots entries of 4 and 2 respectively in # ~/tmp/bhosts. Machine AA:... is given no entry at all in # /tmp/bhosts (but is still configured for DHCP and potentially WOL), # while the remainder are all given entries in ~/tmp/bhosts with # either \$NUM_SLOTS slots if that variable is defined, or no slots # configured at all if it is not. Note it is safe to put an entry for # the frontend ( like i1s ) if you want to specify # the frontend has slots. # # Obviously you will have to find the real MAC addresses of your # nodes. This can be done by booting and then using the # /sbin/ifconfig command when logged into a node. (Systematic node # shutdown or the "beep" command might be useful in physically mapping # the nodes.) 08CONFIG ######## END OF PELICAN SETUP SCRIPTS ############# ######## POST BOOT SCRIPT #################### install -d config/chroot_local-includes/usr/share/initramfs-tools/scripts/live-bottom/ cat <<99START > config/chroot_local-includes/usr/share/initramfs-tools/scripts/live-bottom/99script #!/bin/sh cat < /etc/rc.local #! /bin/sh pelican_boot_setup RC_LOCAL chmod a+x /etc/rc.local 99START chmod +x config/chroot_local-includes/usr/share/initramfs-tools/scripts/live-bottom/99script # make the scripts in /usr/bin executable chmod +x config/chroot_local-includes/usr/bin/* # build the ISO image # nice ionice -c2 lb build --debug 2>&1 | tee build.log lb build --debug 2>&1 | tee build.log # safeguard against crashes umount proc-live umount sysfs-live umount devpts-live sync date mv *.iso ../"$IMAGENAME"