Fixed locking checks

This commit is contained in:
deajan 2016-11-17 12:13:56 +01:00
parent e7e240abf8
commit c4302a46ef
1 changed files with 73 additions and 47 deletions

View File

@ -4,7 +4,7 @@ PROGRAM="osync" # Rsync based two way sync engine with fault tolerance
AUTHOR="(C) 2013-2016 by Orsiris de Jong" AUTHOR="(C) 2013-2016 by Orsiris de Jong"
CONTACT="http://www.netpower.fr/osync - ozy@netpower.fr" CONTACT="http://www.netpower.fr/osync - ozy@netpower.fr"
PROGRAM_VERSION=1.2-beta2 PROGRAM_VERSION=1.2-beta2
PROGRAM_BUILD=2016111601 PROGRAM_BUILD=2016111701
IS_STABLE=no IS_STABLE=no
# Execution order #__WITH_PARANOIA_DEBUG # Execution order #__WITH_PARANOIA_DEBUG
@ -409,27 +409,37 @@ function CreateStateDirs {
function _CheckLocksLocal { function _CheckLocksLocal {
local lockfile="${1}" local lockfile="${1}"
__CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG local replicaType="${2}"
local lockfile_content __CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
local lock_pid
local lock_instance_id
if [ -f "$lockfile" ]; then local lockfileContent
lockfile_content=$(cat $lockfile) local lockPid
Logger "Master lock pid present: $lockfile_content" "DEBUG" local lockInstanceID
lock_pid=${lockfile_content%@*}
lock_instance_id=${lockfile_content#*@} if [ -s "$lockfile" ]; then
kill -9 $lock_pid > /dev/null 2>&1 lockfileContent=$(cat $lockfile)
Logger "Master lock pid present: $lockfileContent" "DEBUG"
lockPid=${lockfileContent%@*}
if [ $(IsInteger $lockPid) -ne 1 ]; then
Logger "Invalid pid [$lockPid] in local replica." "CRITICAL"
exit 1
fi
lockInstanceID=${lockfileContent#*@}
if [ "$lockInstanceID" == "" ]; then
Logger "Invalid instance id [$lockInstanceID] in local replica." "CRITICAL"
exit 1
fi
kill -0 $lockPid > /dev/null 2>&1
if [ $? != 0 ]; then if [ $? != 0 ]; then
Logger "There is a dead osync lock in [$lockfile]. Instance [$lock_pid] no longer running. Resuming." "NOTICE" Logger "There is a local dead osync lock with pid [$lockPid]. Instance [$lockInstanceID] no longer running. Resuming." "NOTICE"
#rm "$lockfile" if [ "$replicaType" == "${INITIATOR[$__type]}" ]; then
#if [ $? != 0 ]; then INITIATOR_OVERWRITE_LOCK=true
# Logger "Cannot remove lock in [$lockfile]." "CRITICAL" elif [ "$replicaType" == "${TARGET[$__type]}" ]; then
# exit 1 TARGET_OVERWRITE_LOCK=true
#fi fi
else else
Logger "There is already a local instance of osync running [$lock_pid] for this replica. Cannot start." "CRITICAL" Logger "There is already a local instance [$lockInstanceID] of osync running with pid [$lockPid] for this replica. Cannot start." "CRITICAL"
exit 1 exit 1
fi fi
fi fi
@ -440,9 +450,9 @@ function _CheckLocksRemote {
__CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG __CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
local cmd local cmd
local lock_pid local lockPid
local lock_instance_id local lockInstanceID
local lockfile_content local lockfileContent
CheckConnectivity3rdPartyHosts CheckConnectivity3rdPartyHosts
CheckConnectivityRemoteHost CheckConnectivityRemoteHost
@ -451,34 +461,42 @@ function _CheckLocksRemote {
Logger "cmd: $cmd" "DEBUG" Logger "cmd: $cmd" "DEBUG"
eval "$cmd" eval "$cmd"
if [ $? != 0 ]; then if [ $? != 0 ]; then
if [ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID" ]; then Logger "Cannot check remote replica lock." "CRITICAL"
lockfile_content=$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID) exit 1
else
Logger "Cannot get remote lockfile." "CRITICAL"
exit 1
fi
fi fi
lock_pid=${lockfile_content%@*} if [ -s "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID" ]; then
lock_instance_id=${lockfile_content#*@} lockfileContent="$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID)"
if [ "$lock_pid" != "" ] && [ "$lock_instance_id" != "" ]; then lockPid=${lockfileContent%@*}
Logger "Remote lock is: $lock_pid@$lock_instance_id" "DEBUG" if [ $(IsInteger $lockPid) -ne 1 ]; then
Logger "Invalid pid [$lockPid] in remote replica lock." "CRITICAL"
exit 1
fi
lockInstanceID=${lockfileContent#*@}
if [ "$lockInstanceID" == "" ]; then
Logger "Invalid instance id [$lockInstanceID] in remote replica." "CRITICAL"
exit 1
fi
kill -0 $lock_pid > /dev/null 2>&1 Logger "Remote lock is: $lockPid@$lockInstanceID" "DEBUG"
kill -0 $lockPid > /dev/null 2>&1
if [ $? != 0 ]; then if [ $? != 0 ]; then
if [ "$lock_instance_id" == "$INSTANCE_ID" ]; then if [ "$lockInstanceID" == "$INSTANCE_ID" ]; then
Logger "There is a dead osync lock on target replica that corresponds to this initiator sync id [$lock_instance_id]. Instance [$lock_pid] no longer running. Resuming." "NOTICE" Logger "There is a remote dead osync lock on target replica that corresponds to this initiator sync id [$lockInstanceID]. Pid [$lockPid] no longer running. Resuming." "NOTICE"
TARGET_OVERWRITE_LOCK=true
else else
if [ "$FORCE_STRANGER_LOCK_RESUME" == "yes" ]; then if [ "$FORCE_STRANGER_LOCK_RESUME" == "yes" ]; then
Logger "WARNING: There is a dead osync lock on target replica that does not correspond to this initiator sync-id [$lock_instance_id]. Forcing resume." "WARN" Logger "There is a remote dead osync lock on target replica that does not correspond to this initiator sync-id [$lockInstanceID]. Forcing resume." "WARN"
TARGET_OVERWRITE_LOCK=true
else else
Logger "There is a dead osync lock on target replica that does not correspond to this initiator sync-id [$lock_instance_id]. Will not resume." "CRITICAL" Logger "There is a remote dead osync lock on target replica that does not correspond to this initiator sync-id [$lockInstanceID]. Will not resume." "CRITICAL"
exit 1 exit 1
fi fi
fi fi
else else
Logger "There is already a local instance of osync that locks target replica [$lock_pid@$lock_instance_id]. Cannot start." "CRITICAL" Logger "There is already a local instance of osync that locks target replica [$lockPid@$lockInstanceID]. Cannot start." "CRITICAL"
exit 1 exit 1
fi fi
fi fi
@ -500,10 +518,10 @@ function CheckLocks {
exit 1 exit 1
fi fi
else else
_CheckLocksLocal "${INITIATOR[$__lockFile]}" & _CheckLocksLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}" &
pids="$!" pids="$!"
if [ "$REMOTE_OPERATION" != "yes" ]; then if [ "$REMOTE_OPERATION" != "yes" ]; then
_CheckLocksLocal "${TARGET[$__lockFile]}" & _CheckLocksLocal "${TARGET[$__lockFile]}" "${INITIATOR[$__type]}" &
pids="$pids;$!" pids="$pids;$!"
else else
_CheckLocksRemote "${TARGET[$__lockFile]}" & _CheckLocksRemote "${TARGET[$__lockFile]}" &
@ -521,10 +539,14 @@ function CheckLocks {
function _WriteLockFilesLocal { function _WriteLockFilesLocal {
local lockfile="${1}" local lockfile="${1}"
local replicaType="${2}" local replicaType="${2}"
__CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG local overwrite="${3:-false}"
__CheckArguments 3 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
( (
set -o noclobber if [ $overwrite == true ]; then
set -o noclobber
fi
$COMMAND_SUDO echo "$SCRIPT_PID@$INSTANCE_ID" > "$lockfile" 2> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}-$replicaType.$SCRIPT_PID" $COMMAND_SUDO echo "$SCRIPT_PID@$INSTANCE_ID" > "$lockfile" 2> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}-$replicaType.$SCRIPT_PID"
) )
if [ $? != 0 ]; then if [ $? != 0 ]; then
@ -539,19 +561,23 @@ function _WriteLockFilesLocal {
function _WriteLockFilesRemote { function _WriteLockFilesRemote {
local lockfile="${1}" local lockfile="${1}"
local replicaType="${2}" local replicaType="${2}"
__CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG local overwrite="${3-false}"
__CheckArguments 3 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
local cmd local cmd
CheckConnectivity3rdPartyHosts CheckConnectivity3rdPartyHosts
CheckConnectivityRemoteHost CheckConnectivityRemoteHost
cmd=$SSH_CMD' "( set -o noclobber; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID' cmd=$SSH_CMD' "( if [ $overwrite == true ]; then set -o noclobber; fi; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID'
#WIP
#cmd=$SSH_CMD' "( set -o noclobber; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID'
Logger "cmd: $cmd" "DEBUG" Logger "cmd: $cmd" "DEBUG"
eval "$cmd" eval "$cmd"
if [ $? != 0 ]; then if [ $? != 0 ]; then
Logger "Could not create lock file on remote $replicaType in [$lockfile]." "CRITICAL" Logger "Could not create lock file on remote $replicaType in [$lockfile]." "CRITICAL"
Loggxer "Command output:\n$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$replicaType.$SCRIPT_PID)" "NOTICE" Logger "Command output:\n$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$replicaType.$SCRIPT_PID)" "NOTICE"
return 1 return 1
else else
Logger "Locked remote $replicaType replica in [$lockfile]." "DEBUG" Logger "Locked remote $replicaType replica in [$lockfile]." "DEBUG"
@ -566,14 +592,14 @@ function WriteLockFiles {
local pidArray local pidArray
local pid local pid
_WriteLockFilesLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}"& _WriteLockFilesLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}" $INITIATOR_LOCK_OVERWRITE &
initiatorPid="$!" initiatorPid="$!"
if [ "$REMOTE_OPERATION" != "yes" ]; then if [ "$REMOTE_OPERATION" != "yes" ]; then
_WriteLockFilesLocal "${TARGET[$__lockFile]}" "${TARGET[$__type]}" & _WriteLockFilesLocal "${TARGET[$__lockFile]}" "${TARGET[$__type]}" $TARGET_LOCK_OVERWRITE &
targetPid="$!" targetPid="$!"
else else
_WriteLockFilesRemote "${TARGET[$__lockFile]}" "${TARGET[$__type]}" & _WriteLockFilesRemote "${TARGET[$__lockFile]}" "${TARGET[$__type]}" $TARGET_LOCK_OVERWRITE &
targetPid="$!" targetPid="$!"
fi fi