#!python
#
# determine-reboot-cause
#
# Program designed to run once, soon after system boot which will
# determine the cause of the previous reboot and store it to the disk,
#

try:
    import datetime
    import json
    import os
    import pwd
    import re
    import sys

    from sonic_py_common import device_info, syslogger

except ImportError as err:
    raise ImportError("%s - required module not found" % str(err))

VERSION = "1.0"

SYSLOG_IDENTIFIER = "determine-reboot-cause"

REBOOT_CAUSE_DIR = "/host/reboot-cause/"
REBOOT_CAUSE_MODULE_DIR = "/host/reboot-cause/module"
REBOOT_CAUSE_HISTORY_DIR = "/host/reboot-cause/history/"
REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR, "reboot-cause.txt")
PREVIOUS_REBOOT_CAUSE_FILE = os.path.join(REBOOT_CAUSE_DIR,  "previous-reboot-cause.json")
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
# On 201803, 201807 we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
# On 201811 and later we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
# To extract the commom part of them, we should have the following PATTERN
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
REBOOT_TYPE_KEXEC_PATTERN_EXPRESS = ".*SONIC_BOOT_TYPE=(express).*"

REBOOT_CAUSE_UNKNOWN = "Unknown"
REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other"
REBOOT_CAUSE_HEARTBEAT_LOSS = "Heartbeat with the Supervisor card lost"

# Global logger class instance
sonic_logger = syslogger.SysLogger(SYSLOG_IDENTIFIER)


# ============================= Functions =============================
def parse_warmfast_reboot_from_proc_cmdline():
    if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
        with open(REBOOT_TYPE_KEXEC_FILE) as cause_file:
            cause_file_kexec = cause_file.readline()
        m = re.search(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
        if m and m.group(1):
            return 'warm-reboot'
        m = re.search(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
        if m and m.group(1):
            return 'fast-reboot'
        m = re.search(REBOOT_TYPE_KEXEC_PATTERN_EXPRESS, cause_file_kexec)
        if m and m.group(1):
            return 'express-reboot'
    return None


def find_software_reboot_cause_from_reboot_cause_file():
    software_reboot_cause = REBOOT_CAUSE_UNKNOWN
    if os.path.isfile(REBOOT_CAUSE_FILE):
        with open(REBOOT_CAUSE_FILE) as cause_file:
            software_reboot_cause = cause_file.readline().rstrip('\n')
            sonic_logger.log_info("{} indicates the reboot cause: {}".format(REBOOT_CAUSE_FILE, software_reboot_cause))
    else:
        sonic_logger.log_info("Reboot cause file {} not found".format(REBOOT_CAUSE_FILE))
    return software_reboot_cause


def find_first_boot_version():
    build_version = "unknown"
    version_info = device_info.get_sonic_version_info()
    if version_info:
        build_version = version_info['build_version']
    return " (First boot of SONiC version {})".format(build_version)


def find_software_reboot_cause():
    software_reboot_cause = find_software_reboot_cause_from_reboot_cause_file()
    if software_reboot_cause == REBOOT_CAUSE_UNKNOWN:
        if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
            software_reboot_cause += find_first_boot_version()
            os.remove(FIRST_BOOT_PLATFORM_FILE)
    return software_reboot_cause


def find_proc_cmdline_reboot_cause():
    proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()

    if proc_cmdline_reboot_cause:
        sonic_logger.log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
    else:
        sonic_logger.log_info("No reboot cause found from /proc/cmdline")

    return proc_cmdline_reboot_cause


def get_reboot_cause_from_platform():
    # Find hardware reboot cause using sonic_platform library
    try:
        import sonic_platform
        platform  = sonic_platform.platform.Platform()
        chassis  = platform.get_chassis()
        hardware_reboot_cause_major, hardware_reboot_cause_minor = chassis.get_reboot_cause()
        sonic_logger.log_info("Platform api returns reboot cause {}, {}".format(hardware_reboot_cause_major, hardware_reboot_cause_minor))
    except (ImportError, FileNotFoundError, NotImplementedError):
        sonic_logger.log_warning("sonic_platform package not installed or not implemented. Unable to detect hardware reboot causes.")
        hardware_reboot_cause_major, hardware_reboot_cause_minor = REBOOT_CAUSE_NON_HARDWARE, "N/A"

    return hardware_reboot_cause_major, hardware_reboot_cause_minor


def find_hardware_reboot_cause():
    hardware_reboot_cause_major, hardware_reboot_cause_minor = get_reboot_cause_from_platform()
    if hardware_reboot_cause_major:
        sonic_logger.log_info("Platform api indicates reboot cause {}".format(hardware_reboot_cause_major))
    else:
        sonic_logger.log_info("No reboot cause found from platform api")

    hardware_reboot_cause_minor_str = ""
    if hardware_reboot_cause_minor:
        hardware_reboot_cause_minor_str = " ({})".format(hardware_reboot_cause_minor)

    hardware_reboot_cause = hardware_reboot_cause_major + hardware_reboot_cause_minor_str

    return hardware_reboot_cause


def get_reboot_cause_dict(previous_reboot_cause, comment, gen_time):
    """Store the key information of device reboot into a dictionary by parsing the string in
    previous_reboot_cause.

    If user issued a command to reboot device, then user, command and time will be
    stored into a dictionary.

    If device was rebooted due to the kernel panic, then the string `Kernel Panic`
    and time will be stored into a dictionary.
    """
    reboot_cause_dict = {}
    reboot_cause_dict['gen_time'] = gen_time
    reboot_cause_dict['cause'] = previous_reboot_cause
    reboot_cause_dict['user'] = "N/A"
    reboot_cause_dict['time'] = "N/A"
    reboot_cause_dict['comment'] = comment if comment is not None else "N/A"

    if re.search(r'User issued', previous_reboot_cause):
        # Match with "User issued '{}' command [User: {}, Time: {}]"
        match = re.search(r'User issued \'(.*)\' command \[User: (.*), Time: (.*)\]', previous_reboot_cause)
        if match is not None:
            reboot_cause_dict['cause'] = match.group(1)
            reboot_cause_dict['user'] = match.group(2)
            reboot_cause_dict['time'] = match.group(3)
    elif re.search(r'Kernel Panic', previous_reboot_cause):
        match = re.search(r'Kernel Panic \[Time: (.*)\]', previous_reboot_cause)
        if match is not None:
            reboot_cause_dict['cause'] = "Kernel Panic"
            reboot_cause_dict['time'] = match.group(1)
    elif re.search(r'Heartbeat with the Supervisor card lost', previous_reboot_cause):
        reboot_cause_dict['cause'] = 'Heartbeat with the Supervisor card lost'

    return reboot_cause_dict

def determine_reboot_cause():
    # This variable is kept for future-use purpose. When proc_cmd_line/vendor/software provides
    # any additional_reboot_info it will be stored as a "comment" in REBOOT_CAUSE_HISTORY_FILE
    additional_reboot_info = "N/A"

    # 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
    proc_cmdline_reboot_cause = find_proc_cmdline_reboot_cause()

    # 2. Check if the previous reboot was caused by hardware
    #    If yes, the hardware reboot cause will be treated as the reboot cause
    hardware_reboot_cause = find_hardware_reboot_cause()

    # 3. If there is a REBOOT_CAUSE_FILE, it will contain any software-related
    #    reboot info. We will use it as the previous cause.
    software_reboot_cause = find_software_reboot_cause()

    # The main decision logic of the reboot cause:
    # If software reboot cause is not heartbeat loss and there is a valid hardware reboot cause indicated by platform API,
    #    check the software reboot cause to add additional reboot cause.
    # If there is a reboot cause indicated by /proc/cmdline, and/or warmreboot/fastreboot/softreboot
    #   the software_reboot_cause which is the content of /hosts/reboot-cause/reboot-cause.txt
    #   will be treated as the additional reboot cause
    # Elif there is a cmdline reboot cause,
    #   the software_reboot_cause will be treated as the reboot cause if it's not unknown
    #   otherwise, the cmdline_reboot_cause will be treated as the reboot cause if it's not none
    # Else the software_reboot_cause will be treated as the reboot cause
    if REBOOT_CAUSE_HEARTBEAT_LOSS not in software_reboot_cause and REBOOT_CAUSE_NON_HARDWARE not in hardware_reboot_cause:
        previous_reboot_cause = hardware_reboot_cause
        # Check if any software reboot was issued before this hardware reboot happened
        if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
            additional_reboot_info = software_reboot_cause
        elif proc_cmdline_reboot_cause is not None:
            additional_reboot_info = proc_cmdline_reboot_cause
    elif proc_cmdline_reboot_cause is not None:
        if software_reboot_cause is not REBOOT_CAUSE_UNKNOWN:
            # Get the reboot cause from REBOOT_CAUSE_FILE
            previous_reboot_cause = software_reboot_cause
        else:
            previous_reboot_cause = proc_cmdline_reboot_cause
    else:
        previous_reboot_cause = software_reboot_cause

    return previous_reboot_cause, additional_reboot_info

def check_and_create_dpu_dirs():
    # Get the list of DPUs
    dpus = device_info.get_dpu_list()

    # Create directories for each DPU and its history
    for dpu in dpus:
        dpu_dir = os.path.join(REBOOT_CAUSE_MODULE_DIR, dpu)
        history_dir = os.path.join(dpu_dir, "history")

        # Create the DPU directory if it doesn't exist
        if not os.path.exists(dpu_dir):
            os.makedirs(dpu_dir)

        # Create reboot-cause.txt and write 'First boot' to it
        reboot_file = os.path.join(dpu_dir, 'reboot-cause.txt')
        with open(reboot_file, 'w') as f:
            f.write('First boot\n')

        # Create the history directory if it doesn't exist
        if not os.path.exists(history_dir):
            os.makedirs(history_dir)

def main():
    # Configure logger to log all messages INFO level and higher
    sonic_logger.set_min_log_priority(sonic_logger.DEFAULT_LOG_LEVEL)

    sonic_logger.log_info("Starting up...")

    if not os.geteuid() == 0:
        sonic_logger.log_error("User {} does not have permission to execute".format(pwd.getpwuid(os.getuid()).pw_name))
        sys.exit("This utility must be run as root")

    # Create REBOOT_CAUSE_DIR if it doesn't exist
    if not os.path.exists(REBOOT_CAUSE_DIR):
        os.makedirs(REBOOT_CAUSE_DIR)

    previous_reboot_cause, additional_reboot_info = determine_reboot_cause()

    # Current time
    reboot_cause_gen_time = str(datetime.datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S'))

    # Save the previous cause info into its history file as json format
    reboot_cause_dict = get_reboot_cause_dict(previous_reboot_cause, additional_reboot_info, reboot_cause_gen_time)

    # Create reboot-cause-#time#.json under history directory
    REBOOT_CAUSE_HISTORY_FILE = os.path.join(REBOOT_CAUSE_HISTORY_DIR, "reboot-cause-{}.json".format(reboot_cause_gen_time))

    # Create REBOOT_CAUSE_HISTORY_DIR if it doesn't exist
    if not os.path.exists(REBOOT_CAUSE_HISTORY_DIR):
        os.makedirs(REBOOT_CAUSE_HISTORY_DIR)

    # Write the previous reboot cause to REBOOT_CAUSE_HISTORY_FILE as a JSON format
    with open(REBOOT_CAUSE_HISTORY_FILE, "w") as reboot_cause_history_file:
        json.dump(reboot_cause_dict, reboot_cause_history_file)

    # Remove stale PREVIOUS_REBOOT_CAUSE_FILE if it exists
    if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE) or os.path.islink(PREVIOUS_REBOOT_CAUSE_FILE):
        os.remove(PREVIOUS_REBOOT_CAUSE_FILE)

    # Create a symbolic link to previous-reboot-cause.json file
    os.symlink(REBOOT_CAUSE_HISTORY_FILE, PREVIOUS_REBOOT_CAUSE_FILE)


    # Remove the old REBOOT_CAUSE_FILE
    if os.path.exists(REBOOT_CAUSE_FILE):
        os.remove(REBOOT_CAUSE_FILE)

    # Write a new default reboot cause file for the next reboot
    with open(REBOOT_CAUSE_FILE, "w") as cause_file:
        cause_file.write(REBOOT_CAUSE_UNKNOWN)

    # Create directories for DPUs in SmartSwitch platforms
    if device_info.is_smartswitch():
        check_and_create_dpu_dirs()


if __name__ == "__main__":
    main()
