#!/usr/local/bin/cbsd
#v10.0.6
MYARG=""
MYOPTARG="node nodelist cmdfile netgroup quiet tty tryoffline"
MYDESC="Execute remote command using ssh on the node"
ADDHELP="node - comma-separated host list, eg. node=s1,s2\n\
nodelist - path to ascii file with node list\n\
quiet=1 - execute command quietly\n\
tty=1 force tty alloc or force no tty\n\
tryoffline=1 - try to fetch inventory when node is offline\n\
EXAMPLES:\n\
cbsd rexe node=s1.my.domain ls -la /tmp \n\
cbsd rexe hostlist=/tmp/linuxhosts scr=/root/bin/sh.sh \n\
cbsd rexe host=\"s1 s2 s3\" date \n"

. ${subr}
. ${strings}
. ${nodes}

init $*

set -o errexit

DEBUG=0
SSH_VERBOSE=""
QUIET=0
UNIQUE_HOSTS=1
REMOTE_SHELL='sh'
TEMP_BASE=${ftmpdir}
CONCURRENT=0
MULTIHOST=0
TTY=1


ssh_connect() {
	local ret

	if [ "${TTY}" = 0 ]; then
		TTYOP="-T"
	else
		TTYOP="-tt"
	fi

	#    echo "$THESCRIPT" | /usr/bin/ssh -l ${cbsduser} $SSH_ARGS ${TTYOP} "${REMOTE_SHELL}" | \
	#    /usr/bin/ssh -l ${cbsduser} $SSH_ARGS ${TTYOP} "${THESCRIPT}"| \
	/usr/bin/ssh -oControlPath=${sshdir}/sockets/%r@%h:%p $SSH_ARGS ${TTYOP} "/usr/local/bin/cbsd -c \"update_idle ${nodename}\"; ${THESCRIPT}"| \
		while read SSH_LINE ; do
			[ "$QUIET" -lt 1 -a "$SSH_LINE" != "" -a "$MULTIHOST" = "0" ] && echo "$SSH_LINE"
#			[ "$QUIET" -lt 1 -a "$SSH_LINE" != "" -a "$MULTIHOST" = "1" ] && echo "$SSH_LINE" | sed -e "s/^/$HOST: /"
		done
	ret=$?
	return ${ret}
}

##############################
# FUNCTIONS FOR FORKED PROCS #
##############################
set_hostlist() {
	# Create a hostlist file.
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: BUILDING HOST LIST FILE $TEMP_DIR/hostlist" 1>&2
	/bin/rm -f $TEMP_DIR/hostlist || exit 1
	for HOST in $HOSTLIST ; do
		echo $HOST >> "$TEMP_DIR/hostlist" || exit 1
	done
}

get_next_host() {
	# lock file
	while [ 1 ] ; do
		echo $CHILDNUM >> "$TEMP_DIR/hostlist.lock"
		TOP_PID=$(head -1 "$TEMP_DIR/hostlist.lock" 2>/dev/null)
		if [ "$TOP_PID" = "$CHILDNUM" ] ; then
			break
		fi
		[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: hostlist file already locked.  Sleep..." 1>&2
		#usleep 1000
		sleep 1
	done
	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Locked hostfile." 1>&2

	# get next host
	NEXT_HOST=$( /usr/bin/head -1 $TEMP_DIR/hostlist)
	HOSTFILE_LEN=$( /usr/bin/wc -l $TEMP_DIR/hostlist | /usr/bin/awk '{print $1}')

	if [ -z "$HOSTFILE_LEN" -o "$HOSTFILE_LEN" = 0 ] ; then
		/bin/rm -f "$TEMP_DIR/hostlist.lock"
		return
	fi

	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Next host: $NEXT_HOST" 1>&2

	# re-write file removing new host
	/bin/rm -f "$TEMP_DIR/hostlist.new"
	/usr/bin/tail -$(( $HOSTFILE_LEN - 1 )) $TEMP_DIR/hostlist > $TEMP_DIR/hostlist.new || exit 1
	/bin/rm -f "$TEMP_DIR/hostlist"
	/bin/mv "$TEMP_DIR/hostlist.new" "$TEMP_DIR/hostlist"

	# unlock file
	[ "$DEBUG" -ge 2 ] && echo "DEBUG[#$CHILDNUM]: Removing hostfile lock." 1>&2
	/bin/rm -f "$TEMP_DIR/hostlist.lock"

	# return hostname
	echo $NEXT_HOST
}

run_child() {
	trap "exit 0" SIGHUP
	CHILDNUM=$1
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: FORKING CHILD #$CHILDNUM of $CONCURRENT (pid $!/$$)" 1>&2

	while [ 1 ] ; do
		# issue:  Cannot call get_next_host inside $() or `` because our trap won't be able to kill that.
		#     solution: avoid subshell here by directing to a file.
		/bin/rm -f $TEMP_DIR/$CHILDNUM.next_host
		get_next_host >$TEMP_DIR/$CHILDNUM.next_host
		HOST=$( /bin/cat $TEMP_DIR/$CHILDNUM.next_host )

		if [ -z "$HOST" ] ; then
			/bin/rm -f "$TEMP_DIR/$CHILDNUM.pid"
			break
		fi

		[ "$DEBUG" -ge 1 ] && echo "DEBUG[#$CHILDNUM]: CONNECT $HOST" 1>&2
		/bin/rm -f "$TEMP_DIR/$CHILDNUM.active"
		echo "$HOST" > "$TEMP_DIR/$CHILDNUM.active"
		#	cbsdsql nodes select ip,port,keyfile from nodelist where nodename=\"${HOST}\"
		NODEDATA=$( cbsdsql nodes select ip,port,keyfile from nodelist where nodename=\"${HOST}\" )

		if [ -z "${NODEDATA}" ]; then
			echo "${HOST}: No such node in base"
			continue
		fi

		sqllist "$NODEDATA" myip myport mykey
		SSH_ARGS="-oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=5 -q -oPort=${myport} -i ${mykey} ${myip}"

		if [ ${tryoffline} -ne 1 ]; then
			if ! check_locktime ${ftmpdir}/shmux_${myip}.lock >/dev/null 2>&1; then
				echo "Node is offline: ${HOST}"
				continue
			fi
		fi

		#	ssh_connect > $TEMP_DIR/$HOST.out
		#	/bin/cat $TEMP_DIR/$HOST.out
		ssh_connect
		ret=$?
		[ $ret -eq 0 ] && update_idle ${HOSTLIST}
		[ $TTY -eq 0 ] && exit
	done
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: CHILD #$CHILDNUM done" 1>&2
	rm -f "$TEMP_DIR/$CHILDNUM.pid" "$TEMP_DIR/$CHILDNUM.active"
}


create_temp() {
	TEMP_DIR=$( /usr/bin/mktemp -d $TEMP_BASE/$(basename $0).XXXXXX) || err 1 "Error mktemp"
}

destroy_temp() {
	[ -d "$TEMP_DIR" ] && /bin/rm -rf "$TEMP_DIR" 2>/dev/null
}

########################################
# REMEMBER TO CLEAN UP BEFORE WE PANIC #
########################################
shutdown() {
	[ "$DEBUG" -ge 1 ] && echo "DEBUG: shutting down children." 1>&2
	CPIDS=$( /bin/cat $TEMP_DIR/*.pid 2>/dev/null)

	for CPID in $CPIDS ; do
		[ "$DEBUG" -ge 2 ] && echo "DEBUG: Killing pid: $CPID" 1>&2
		kill -HUP $CPID
	done

	[ "$DEBUG" -ge 2 ] && echo "DEBUG: shutting down ssh-agent" 1>&2
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: removing temp dir" 1>&2

	destroy_temp

	[ "$DEBUG" -ge 2 ] && echo "DEBUG: done shutting down." 1>&2
	exit 1
}

spew_hostlist() {
	echo "HOSTS RUNNING:"  1>&2
	/bin/cat $TEMP_DIR/*.active 2>/dev/null | sed 's/^/    /'  1>&2
	echo "HOSTS REMAINING:"  1>&2
	/bin/cat $TEMP_DIR/hostlist 2>/dev/null | sed 's/^/    /'  1>&2
	return
}


## Main()
trap shutdown SIGINT
trap shutdown SIGTERM
trap spew_hostlist SIGQUIT
trap "exit 0" SIGHUP

[ -n "${blocking}" ] && BLOCKING=1 && shift

if [ -n "${quiet}" ]; then
	QUIET=1
	DEBUG=0
	SSH_VERBOSE="-q"
	shift
fi

[ -n "${debug}" ] && DEBUG="${debug}" && shift
[ -n "${node}" ] && HOSTLIST=$( echo ${node}|tr ',' ' ' ) && shift
[ -n "${cmd}" ] && THESCRIPT="${cmd}" && shift
[ -n "${tty}" ] && TTY=${tty} && shift

if [ -z "${tryoffline}" ]; then
	tryoffline=0
	checkforonline="tryoffline=0"
else
	checkforonline="tryoffline=${tryoffline}"
	shift
fi

if [ -n "${nodelist}" ]; then
	[ ! -f "${nodelist}" ] && err 1 "${nodelist} file doesn't exist"
	HOSTLIST="$( /bin/cat $nodelist | /usr/bin/sed -e 's/#.*//' | /usr/bin/egrep -v "^ *$" )"
	shift
fi

if [ -n "${netgroup}" ]; then
	# Negtgoup
	NETGROUP=$2
	NETGROUP_LIST="$( /usr/bin/getent netgroup $NETGROUP | /usr/bin/xargs -n 1 echo | /usr/bin/sed -n '/^(.*,$/s/[,(]//gp')"
	[ -z "${NETGROUP_LIST}" ] && err 1 "Failed to get netgroup: $NETGROUP"
	HOSTLIST="$NETGROUP_LIST $HOSTLIST"
	shift
fi

if [ -n "${cmdfile}" ]; then
	[ ! -e "$cmdfile" ] && err 1 "myssh: Script File '$SCRIPTFILE' does not exist!"
	THESCRIPT="$THESCRIPT $( /bin/cat $cmdfile )"
	shift
fi

HOSTLIST=$(echo "$HOSTLIST" | /usr/bin/sed -e 's/#.*//' | /usr/bin/egrep -v "^ *$" )
[ -z "${HOSTLIST}" ] && err 1 "host list is empty"

COUNT_HOSTS=$(echo "$HOSTLIST" |/usr/bin/wc -w|/usr/bin/awk '{printf $1}')
CONCURRENT=$COUNT_HOSTS
[ "$COUNT_HOSTS" = "1" ] && MULTIHOST=0
[ -z "${cmd}" -a -z "${cmdfile}" ] && THESCRIPT=$@

if [ "${COUNT_HOSTS}" = "1" ]; then
	if [ "${TTY}" = 0 ]; then
		TTYOP="-T"
	else
		TTYOP="-tt"
	fi

	NODEDATA=$( cbsdsql nodes select ip,port,keyfile from nodelist where nodename=\"${HOSTLIST}\" )

	if [ -z "${NODEDATA}" ]; then
		err 1 "${HOST}: No such node in database"
	fi

	sqllist "$NODEDATA" myip myport mykey

	if [ ${tryoffline} -ne 1 ]; then
		if ! check_locktime ${ftmpdir}/shmux_${myip}.lock >/dev/null 2>&1; then
			err 1 "Node is offline: ${HOSTLIST}"
		fi
	fi

	SSH_ARGS="-l ${cbsduser} -oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=5 -q -oPort=${myport} -i ${mykey} ${myip}"
	SSHMUX="${sshdir}/sockets/${cbsduser}@${myip}:${myport}"
	#    [ ! -S "${SSHMUX}" ] && err 1 "No such socket ${SSHMUX}. Please run nodepinger service"
	/usr/bin/ssh -oControlPath=${sshdir}/sockets/%r@%h:%p -t ${SSH_ARGS} ${TTOP} -C "/usr/local/bin/cbsd -c \"update_idle ${nodename}\"; ${THESCRIPT}"
#	/usr/bin/ssh -oControlPath=${sshdir}/sockets/%r@%h:%p -t ${SSH_ARGS} ${TTOP} -C <<CBSD_EOF
#${THESCRIPT}
#CBSD_EOF

	ret=$?
	[ $ret -eq 0 ] && update_idle ${HOSTLIST}
	exit $ret
fi

#TTY=1
create_temp

# Fork $CONCURRENT children
set_hostlist
CHILDNUM=1
while [ $CHILDNUM -le $CONCURRENT ] ; do
	run_child $CHILDNUM &
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: FORKED CHILD #$CHILDNUM with pid $!" 1>&2
	/bin/rm -f "$TEMP_DIR/$CHILDNUM.pid"
	echo $! > "$TEMP_DIR/$CHILDNUM.pid"
	CHILDNUM=$((CHILDNUM+1))
done
wait
# since trap causes waits to stop waiting with a return value >128
# We need to check that we really meant to stop waiting.
RETVAL=$?
while [ "$RETVAL" -gt 128 ] ; do
	[ "$DEBUG" -ge 2 ] && echo "DEBUG: wait returned with a value of $RETVAL" 1>&2
	DO_WAIT=0
	for CPID in $( /bin/cat $TEMP_DIR/*.pid 2>/dev/null) ; do
		if [ -d "/proc/$CPID" ] ; then
			DO_WAIT=1
			[ "$DEBUG" -ge 2 ] && echo "DEBUG: $CPID is still running." 1>&2
		fi
	done
	[ "$DO_WAIT" = 0 ] && break
	wait
done

destroy_temp
