Skip to content

Commit

Permalink
[generate_dump] [Mellanox] Fix the duplicate dfw dump collection prob…
Browse files Browse the repository at this point in the history
…lem by adding symlinks (sonic-net#2536)

- What I did
Currently the dfw dumps which are usually saved under /var/log/mellanox/sdk-dumps are collect twice in the techsupport. Once under log/ and once under sai_sdk_dump/ folder.

Fixed the scenario by creating a symbolic link from sai_sdk_dump/sai-dfw-xxxxxxxxx.tar.gz -> ../log/sai-dfw-xxxxxxxxx.tar.gz

- How I did it
dfw dumps are copied from syncd currently, but the logic is updated to collect files from the host if SAI_DUMP_STORE_PATH is mounted on the host
Fixed the duplicate dfw dump collection problems by adding a relative symbolic link from sai-sdk-dump/ -> log/ folder.
fw dump me collection is moved to a new function collect_mellanox_dfw_dumps which in run at the end i.e. after the files under /var/log are saved

- How to verify it
root@switch:/home/admin# show techsupport --verbose
root@switch:/home/admin/sonic_dump_r-lionfish-13_20221202_081958/log# ls -Al | grep dfw
-rw-r--r-- 1 root root  1841061 Dec  2 08:21 sai-dfw-1669685690.tar.gz
root@switch:/home/admin/sonic_dump_r-lionfish-13_20221202_081958/sai_sdk_dump# ls -Al

Signed-off-by: Vivek Reddy Karri <vkarri@nvidia.com>
  • Loading branch information
vivekrnv authored and preetham-singh committed Dec 6, 2022
1 parent 62dc883 commit f145b9a
Showing 1 changed file with 94 additions and 10 deletions.
104 changes: 94 additions & 10 deletions scripts/generate_dump
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,49 @@ enable_logrotate() {
sed -i '/\/usr\/sbin\/logrotate/s/^#*//g' /etc/cron.d/logrotate
}

###############################################################################
# Create a relative symbolic link of an existing file
# Globals:
# BASE
# MKDIR
# TAR
# TARFILE
# DUMPDIR
# V
# RM
# NOOP
# Arguments:
# filename: the full path of the file
# dest_dir: destination dir where the link is created
# src_sir: directory under $TARDIR where the actual file exists
# Returns:
# None
###############################################################################
save_symlink() {
trap 'handle_error $? $LINENO' ERR
local start_t=$(date +%s%3N)
local end_t=0
local filename=$1
local dest_dir=$2
local src_dir=$3
local do_tar_append=${4:-true}
local file_basename=$(basename $filename)
local tar_path="$BASE/$dest_dir/$file_basename"

$MKDIR $V -p "$TARDIR/$dest_dir"

${CMD_PREFIX}pushd $TARDIR/$dest_dir
${CMD_PREFIX}ln -s ../$src_dir/$file_basename $file_basename
${CMD_PREFIX}popd

if $do_tar_append; then
($TAR $V -rf $TARFILE -C $DUMPDIR "$tar_path" \
|| abort "${EXT_PROCFS_SAVE_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
&& $RM $V -f "$DUMPDIR/$tar_path"
fi
end_t=$(date +%s%3N)
echo "[ save_symlink:$filename] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
}

###############################################################################
# Collect Mellanox specific information
Expand Down Expand Up @@ -1025,16 +1068,6 @@ collect_mellanox() {
${CMD_PREFIX}rm -rf $sai_dump_folder
${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder

# Save SDK error dumps
local sdk_dump_path=`${CMD_PREFIX}docker exec syncd cat /tmp/sai.profile|grep "SAI_DUMP_STORE_PATH"|cut -d = -f2`
if [[ -d $sdk_dump_path ]]; then
copy_from_docker syncd $sdk_dump_path /tmp/sdk-dumps
for file in $(find /tmp/sdk-dumps -type f); do
save_file ${file} sai_sdk_dump false
done
rm -rf /tmp/sdk-dumps
fi

# run 'hw-management-generate-dump.sh' script and save the result file
HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
if [ -f "$HW_DUMP_FILE" ]; then
Expand All @@ -1056,6 +1089,53 @@ collect_mellanox() {

}

###############################################################################
# Collect dfw dumps if any. Applies to only MLNX platform
# Globals:
# CMD_PREFIX
# Arguments:
# None
# Returns:
# None
###############################################################################
collect_mellanox_dfw_dumps() {
trap 'handle_error $? $LINENO' ERR
local platform=$(python3 -c "from sonic_py_common import device_info; print(device_info.get_platform())")
local hwsku=$(python3 -c "from sonic_py_common import device_info; print(device_info.get_hwsku())")
local sdk_dump_path=`cat /usr/share/sonic/device/${platform}/${hwsku}/sai.profile|grep "SAI_DUMP_STORE_PATH"|cut -d = -f2`

if [[ ! -d $sdk_dump_path ]]; then
# This would mean the SAI_DUMP_STORE_PATH is not mounted on the host and is only accessible though the container
# This is a bad design and not recommended But there is nothing which restricts against it and thus the special handling
if [[ "$( docker container inspect -f '{{.State.Running}}' syncd )" == "true" ]]; then
$RM $V -rf /tmp/dfw-sdk-dumps
$MKDIR $V -p /tmp/dfw-sdk-dumps
copy_from_docker syncd $sdk_dump_path /tmp/dfw-sdk-dumps
else
echo "ERROR: dfw dumps cannot be collected"
fi
sdk_dump_path="/tmp/dfw-sdk-dumps"
fi

for file in $(find_files "$sdk_dump_path"); do
if $TAR -tf $TARFILE | grep $BASE/log/$(basename $file); then
# If this path sits under "/var/log/" dir, the files
# would've already been collected and thus just add a sym link
if [ ! -z "${file##*.gz}" ]; then
# files saved under log/ are zipped with gz
file=$file.gz
fi
${CMD_PREFIX}save_symlink ${file} sai_sdk_dump log
else
if [ ! -z "${file##*.gz}" ]; then
${CMD_PREFIX}save_file ${file} sai_sdk_dump true
else
${CMD_PREFIX}save_file ${file} sai_sdk_dump false
fi
fi
done
}

###############################################################################
# Collect Broadcom specific information
# Globals:
Expand Down Expand Up @@ -1626,6 +1706,10 @@ main() {
save_crash_files
save_warmboot_files

if [[ "$asic" = "mellanox" ]]; then
collect_mellanox_dfw_dumps
fi

finalize
}

Expand Down

0 comments on commit f145b9a

Please sign in to comment.