Fixed locking checks
This commit is contained in:
parent
e7e240abf8
commit
c4302a46ef
120
dev/n_osync.sh
120
dev/n_osync.sh
|
@ -4,7 +4,7 @@ PROGRAM="osync" # Rsync based two way sync engine with fault tolerance
|
||||||
AUTHOR="(C) 2013-2016 by Orsiris de Jong"
|
AUTHOR="(C) 2013-2016 by Orsiris de Jong"
|
||||||
CONTACT="http://www.netpower.fr/osync - ozy@netpower.fr"
|
CONTACT="http://www.netpower.fr/osync - ozy@netpower.fr"
|
||||||
PROGRAM_VERSION=1.2-beta2
|
PROGRAM_VERSION=1.2-beta2
|
||||||
PROGRAM_BUILD=2016111601
|
PROGRAM_BUILD=2016111701
|
||||||
IS_STABLE=no
|
IS_STABLE=no
|
||||||
|
|
||||||
# Execution order #__WITH_PARANOIA_DEBUG
|
# Execution order #__WITH_PARANOIA_DEBUG
|
||||||
|
@ -409,27 +409,37 @@ function CreateStateDirs {
|
||||||
|
|
||||||
function _CheckLocksLocal {
|
function _CheckLocksLocal {
|
||||||
local lockfile="${1}"
|
local lockfile="${1}"
|
||||||
__CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
local replicaType="${2}"
|
||||||
|
|
||||||
local lockfile_content
|
__CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
||||||
local lock_pid
|
|
||||||
local lock_instance_id
|
|
||||||
|
|
||||||
if [ -f "$lockfile" ]; then
|
local lockfileContent
|
||||||
lockfile_content=$(cat $lockfile)
|
local lockPid
|
||||||
Logger "Master lock pid present: $lockfile_content" "DEBUG"
|
local lockInstanceID
|
||||||
lock_pid=${lockfile_content%@*}
|
|
||||||
lock_instance_id=${lockfile_content#*@}
|
if [ -s "$lockfile" ]; then
|
||||||
kill -9 $lock_pid > /dev/null 2>&1
|
lockfileContent=$(cat $lockfile)
|
||||||
|
Logger "Master lock pid present: $lockfileContent" "DEBUG"
|
||||||
|
lockPid=${lockfileContent%@*}
|
||||||
|
if [ $(IsInteger $lockPid) -ne 1 ]; then
|
||||||
|
Logger "Invalid pid [$lockPid] in local replica." "CRITICAL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
lockInstanceID=${lockfileContent#*@}
|
||||||
|
if [ "$lockInstanceID" == "" ]; then
|
||||||
|
Logger "Invalid instance id [$lockInstanceID] in local replica." "CRITICAL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
kill -0 $lockPid > /dev/null 2>&1
|
||||||
if [ $? != 0 ]; then
|
if [ $? != 0 ]; then
|
||||||
Logger "There is a dead osync lock in [$lockfile]. Instance [$lock_pid] no longer running. Resuming." "NOTICE"
|
Logger "There is a local dead osync lock with pid [$lockPid]. Instance [$lockInstanceID] no longer running. Resuming." "NOTICE"
|
||||||
#rm "$lockfile"
|
if [ "$replicaType" == "${INITIATOR[$__type]}" ]; then
|
||||||
#if [ $? != 0 ]; then
|
INITIATOR_OVERWRITE_LOCK=true
|
||||||
# Logger "Cannot remove lock in [$lockfile]." "CRITICAL"
|
elif [ "$replicaType" == "${TARGET[$__type]}" ]; then
|
||||||
# exit 1
|
TARGET_OVERWRITE_LOCK=true
|
||||||
#fi
|
fi
|
||||||
else
|
else
|
||||||
Logger "There is already a local instance of osync running [$lock_pid] for this replica. Cannot start." "CRITICAL"
|
Logger "There is already a local instance [$lockInstanceID] of osync running with pid [$lockPid] for this replica. Cannot start." "CRITICAL"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -440,9 +450,9 @@ function _CheckLocksRemote {
|
||||||
__CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
__CheckArguments 1 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
||||||
|
|
||||||
local cmd
|
local cmd
|
||||||
local lock_pid
|
local lockPid
|
||||||
local lock_instance_id
|
local lockInstanceID
|
||||||
local lockfile_content
|
local lockfileContent
|
||||||
|
|
||||||
CheckConnectivity3rdPartyHosts
|
CheckConnectivity3rdPartyHosts
|
||||||
CheckConnectivityRemoteHost
|
CheckConnectivityRemoteHost
|
||||||
|
@ -451,34 +461,42 @@ function _CheckLocksRemote {
|
||||||
Logger "cmd: $cmd" "DEBUG"
|
Logger "cmd: $cmd" "DEBUG"
|
||||||
eval "$cmd"
|
eval "$cmd"
|
||||||
if [ $? != 0 ]; then
|
if [ $? != 0 ]; then
|
||||||
if [ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID" ]; then
|
Logger "Cannot check remote replica lock." "CRITICAL"
|
||||||
lockfile_content=$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID)
|
exit 1
|
||||||
else
|
|
||||||
Logger "Cannot get remote lockfile." "CRITICAL"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
lock_pid=${lockfile_content%@*}
|
if [ -s "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID" ]; then
|
||||||
lock_instance_id=${lockfile_content#*@}
|
lockfileContent="$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID)"
|
||||||
|
|
||||||
if [ "$lock_pid" != "" ] && [ "$lock_instance_id" != "" ]; then
|
lockPid=${lockfileContent%@*}
|
||||||
Logger "Remote lock is: $lock_pid@$lock_instance_id" "DEBUG"
|
if [ $(IsInteger $lockPid) -ne 1 ]; then
|
||||||
|
Logger "Invalid pid [$lockPid] in remote replica lock." "CRITICAL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
lockInstanceID=${lockfileContent#*@}
|
||||||
|
if [ "$lockInstanceID" == "" ]; then
|
||||||
|
Logger "Invalid instance id [$lockInstanceID] in remote replica." "CRITICAL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
kill -0 $lock_pid > /dev/null 2>&1
|
Logger "Remote lock is: $lockPid@$lockInstanceID" "DEBUG"
|
||||||
|
|
||||||
|
kill -0 $lockPid > /dev/null 2>&1
|
||||||
if [ $? != 0 ]; then
|
if [ $? != 0 ]; then
|
||||||
if [ "$lock_instance_id" == "$INSTANCE_ID" ]; then
|
if [ "$lockInstanceID" == "$INSTANCE_ID" ]; then
|
||||||
Logger "There is a dead osync lock on target replica that corresponds to this initiator sync id [$lock_instance_id]. Instance [$lock_pid] no longer running. Resuming." "NOTICE"
|
Logger "There is a remote dead osync lock on target replica that corresponds to this initiator sync id [$lockInstanceID]. Pid [$lockPid] no longer running. Resuming." "NOTICE"
|
||||||
|
TARGET_OVERWRITE_LOCK=true
|
||||||
else
|
else
|
||||||
if [ "$FORCE_STRANGER_LOCK_RESUME" == "yes" ]; then
|
if [ "$FORCE_STRANGER_LOCK_RESUME" == "yes" ]; then
|
||||||
Logger "WARNING: There is a dead osync lock on target replica that does not correspond to this initiator sync-id [$lock_instance_id]. Forcing resume." "WARN"
|
Logger "There is a remote dead osync lock on target replica that does not correspond to this initiator sync-id [$lockInstanceID]. Forcing resume." "WARN"
|
||||||
|
TARGET_OVERWRITE_LOCK=true
|
||||||
else
|
else
|
||||||
Logger "There is a dead osync lock on target replica that does not correspond to this initiator sync-id [$lock_instance_id]. Will not resume." "CRITICAL"
|
Logger "There is a remote dead osync lock on target replica that does not correspond to this initiator sync-id [$lockInstanceID]. Will not resume." "CRITICAL"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
Logger "There is already a local instance of osync that locks target replica [$lock_pid@$lock_instance_id]. Cannot start." "CRITICAL"
|
Logger "There is already a local instance of osync that locks target replica [$lockPid@$lockInstanceID]. Cannot start." "CRITICAL"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -500,10 +518,10 @@ function CheckLocks {
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
_CheckLocksLocal "${INITIATOR[$__lockFile]}" &
|
_CheckLocksLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}" &
|
||||||
pids="$!"
|
pids="$!"
|
||||||
if [ "$REMOTE_OPERATION" != "yes" ]; then
|
if [ "$REMOTE_OPERATION" != "yes" ]; then
|
||||||
_CheckLocksLocal "${TARGET[$__lockFile]}" &
|
_CheckLocksLocal "${TARGET[$__lockFile]}" "${INITIATOR[$__type]}" &
|
||||||
pids="$pids;$!"
|
pids="$pids;$!"
|
||||||
else
|
else
|
||||||
_CheckLocksRemote "${TARGET[$__lockFile]}" &
|
_CheckLocksRemote "${TARGET[$__lockFile]}" &
|
||||||
|
@ -521,10 +539,14 @@ function CheckLocks {
|
||||||
function _WriteLockFilesLocal {
|
function _WriteLockFilesLocal {
|
||||||
local lockfile="${1}"
|
local lockfile="${1}"
|
||||||
local replicaType="${2}"
|
local replicaType="${2}"
|
||||||
__CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
local overwrite="${3:-false}"
|
||||||
|
|
||||||
|
__CheckArguments 3 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
||||||
|
|
||||||
(
|
(
|
||||||
set -o noclobber
|
if [ $overwrite == true ]; then
|
||||||
|
set -o noclobber
|
||||||
|
fi
|
||||||
$COMMAND_SUDO echo "$SCRIPT_PID@$INSTANCE_ID" > "$lockfile" 2> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}-$replicaType.$SCRIPT_PID"
|
$COMMAND_SUDO echo "$SCRIPT_PID@$INSTANCE_ID" > "$lockfile" 2> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}-$replicaType.$SCRIPT_PID"
|
||||||
)
|
)
|
||||||
if [ $? != 0 ]; then
|
if [ $? != 0 ]; then
|
||||||
|
@ -539,19 +561,23 @@ function _WriteLockFilesLocal {
|
||||||
function _WriteLockFilesRemote {
|
function _WriteLockFilesRemote {
|
||||||
local lockfile="${1}"
|
local lockfile="${1}"
|
||||||
local replicaType="${2}"
|
local replicaType="${2}"
|
||||||
__CheckArguments 2 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
local overwrite="${3-false}"
|
||||||
|
|
||||||
|
__CheckArguments 3 $# "${FUNCNAME[0]}" "$@" #__WITH_PARANOIA_DEBUG
|
||||||
|
|
||||||
local cmd
|
local cmd
|
||||||
|
|
||||||
CheckConnectivity3rdPartyHosts
|
CheckConnectivity3rdPartyHosts
|
||||||
CheckConnectivityRemoteHost
|
CheckConnectivityRemoteHost
|
||||||
|
|
||||||
cmd=$SSH_CMD' "( set -o noclobber; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID'
|
cmd=$SSH_CMD' "( if [ $overwrite == true ]; then set -o noclobber; fi; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID'
|
||||||
|
#WIP
|
||||||
|
#cmd=$SSH_CMD' "( set -o noclobber; echo '$SCRIPT_PID@$INSTANCE_ID' | '$COMMAND_SUDO' tee \"'$lockfile'\")" > /dev/null 2> $RUN_DIR/$PROGRAM._WriteLockFilesRemote.$replicaType.$SCRIPT_PID'
|
||||||
Logger "cmd: $cmd" "DEBUG"
|
Logger "cmd: $cmd" "DEBUG"
|
||||||
eval "$cmd"
|
eval "$cmd"
|
||||||
if [ $? != 0 ]; then
|
if [ $? != 0 ]; then
|
||||||
Logger "Could not create lock file on remote $replicaType in [$lockfile]." "CRITICAL"
|
Logger "Could not create lock file on remote $replicaType in [$lockfile]." "CRITICAL"
|
||||||
Loggxer "Command output:\n$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$replicaType.$SCRIPT_PID)" "NOTICE"
|
Logger "Command output:\n$(cat $RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$replicaType.$SCRIPT_PID)" "NOTICE"
|
||||||
return 1
|
return 1
|
||||||
else
|
else
|
||||||
Logger "Locked remote $replicaType replica in [$lockfile]." "DEBUG"
|
Logger "Locked remote $replicaType replica in [$lockfile]." "DEBUG"
|
||||||
|
@ -566,14 +592,14 @@ function WriteLockFiles {
|
||||||
local pidArray
|
local pidArray
|
||||||
local pid
|
local pid
|
||||||
|
|
||||||
_WriteLockFilesLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}"&
|
_WriteLockFilesLocal "${INITIATOR[$__lockFile]}" "${INITIATOR[$__type]}" $INITIATOR_LOCK_OVERWRITE &
|
||||||
initiatorPid="$!"
|
initiatorPid="$!"
|
||||||
|
|
||||||
if [ "$REMOTE_OPERATION" != "yes" ]; then
|
if [ "$REMOTE_OPERATION" != "yes" ]; then
|
||||||
_WriteLockFilesLocal "${TARGET[$__lockFile]}" "${TARGET[$__type]}" &
|
_WriteLockFilesLocal "${TARGET[$__lockFile]}" "${TARGET[$__type]}" $TARGET_LOCK_OVERWRITE &
|
||||||
targetPid="$!"
|
targetPid="$!"
|
||||||
else
|
else
|
||||||
_WriteLockFilesRemote "${TARGET[$__lockFile]}" "${TARGET[$__type]}" &
|
_WriteLockFilesRemote "${TARGET[$__lockFile]}" "${TARGET[$__type]}" $TARGET_LOCK_OVERWRITE &
|
||||||
targetPid="$!"
|
targetPid="$!"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue