# Copyright 2019,2021,2022 VMware, Inc.
# All rights reserved. -- VMware Confidential

"""ESX System Storage debugging script.

Collect and print out debugging information relative to the ESX System Storage
subsystem. The script can run standalone, or be invoked by vm-support.
"""
from collections import OrderedDict
from datetime import datetime, timedelta
import gzip
import json
from math import ceil
import os
import re
import sys
from traceback import format_exception

from borautils.threadpool import ThreadPool
from coredump import (autoCoredumpEnabled, getDiskDumpSlotSize,
                      iterateCoredumpLocations, calcNeededCoredumpSize)
from esxutils import (EsxBootOpts, getBuildId, getCln, getEsxVersion, getFQDN,
                      getHostIp, getSystemUUID, getVmkBootOptions, isEsxInAVm,
                      isEsxInstaller, runCli)
from systemStorage import *
from systemStorage.esxdisk import getStorageAdapters, iterDisks
from systemStorage.esxfs import getFssVolumes, getVolumeId, isNfsVolume
from uefi.uefivar import isUefi

# XXX: PR2407016,2416523
#
# The supported way to know whether a host is PXE-booted is to look for the boot
# NIC MAC on the vmkernel command line. However there are cases where the MAC is
# not populated, so instead we must assume PXE if no bootbank is found.
WORKAROUND_PR2407016 = True

VALUE_UNKNOWN = "<unknown>"

def formatSize(sizeInBytes):
   """Format a byte-size to a human-readable string.
   """
   if sizeInBytes >= GiB:
      return "%.1fGB" % (sizeInBytes / GiB)
   elif sizeInBytes >= MiB:
      return "%.1fMB" % (sizeInBytes / MiB)
   elif sizeInBytes >= 1024:
      return "%.1fKB" % (sizeInBytes / 1024)
   else:
      return "%uB" % sizeInBytes

def printTable(rows, indent=0):
   """Print out rows of data as a table.

   First row in the table must provide the name of each column, prefixed with
   '<' (left-align), or '>' (right-align).
   """
   colWidths = [0] * len(rows[0])

   for row in rows:
      for i, item in enumerate(row):
         if row[i] is not None:
            colWidths[i] = max(colWidths[i], len(str(item)))

   fmt = " " * indent
   for i, width in enumerate(colWidths):
      if i > 0:
         fmt += " "
      alignment = rows[0][i][0]
      fmt += "{:%s%u}" % (alignment, width)
      rows[0][i] = rows[0][i][1:]

   for row in rows:
      row = ["" if c is None else c for c in row]
      print(fmt.format(*row))


class DebugInfo(OrderedDict):
   """A dict object to collect debug info.

   DebugInfo keys and values represent the state of a particular subsystem. For
   example, a DebugInfo object representing a disk would have one of its keys
   named 'numSectors', and the value for this key would be the number of sectors
   on the disk.

   Every DebugInfo object has a special 'errors' key which lists errors and
   warnings in the subsystem that is represents:

     DebugInfo['errors'] = {"errors": [<list of error records>].
                            "warnings": [[<list of warning records>]]}

   Error and warning records are formatted as follow:

     {"msg": "human readable error string",
      "traceback": <optional python traceback (or None)>}

     - OR -

     {"follow": "name of child subsystem"}

   Error records of type "follow" are used to chain errors in a hierarchy of
   DebugInfo objects. The following example shows how a low-level disk error can
   be "bubbled up" to be reported at the high-level filesystem layer:

     "Filesystem": {
        "errors": {"errors": [{"follow": "Disks"}]},
        "Disks": {
           "errors": {"errors": [{"follow": "mpx.vmhba0:C0:T0:L0"}]},
           "mpx.vmhba0:C0:T0:L0": {
              "errors": {"errors": [{"msg": "mpx.vmhba0:C0:T0:L0: I/O error"}]}
           }
        }
     }
   """

   def __init__(self, *args, **kwargs):
      super().__init__()
      self['errors'] = {'errors': [], 'warnings': []}

      try:
         self.scan(*args, **kwargs)
      except Exception:
         self.error("%s error while collecting debug information" %
                    __class__.__name__)

   def scan(self, *args, **kwargs):
      """Helper method to collect debug information. Subclasses may override
      this method.
      """
      return

   def error(self, msg, level='errors'):
      """Log an error for ulterior reporting.
      """
      err = OrderedDict()
      err['msg'] = msg

      excType, excVal, tb = sys.exc_info()
      if tb is None:
         err['traceback'] = None
      else:
         err['traceback'] = format_exception(excType, excVal, tb)

      self['errors'][level].append(err)

   def warning(self, msg):
      """Log an error for ulterior reporting.
      """
      self.error(msg, level='warnings')

   def __setitem__(self, key, value):
      """Add a new (key, val) debug info entry.
      """
      super().__setitem__(key, value)

      # For values of type DebugInfo, link any error in the value object to this
      # parent DebugInfo object.
      if isinstance(value, DebugInfo):
         for level in ('errors', 'warnings'):
            if value['errors'][level]:
               self['errors'][level].append({'follow': key})
               break

   @property
   def hasErrors(self):
      """True if an error was detected.
      """
      return self['errors']['errors'] != []

   def printErrors(self, name, indent=0):
      """Print DebugInfo errors recursively.
      """
      errors = self['errors']['errors']
      warnings = self['errors']['warnings']
      if not (errors + warnings):
         return

      print(" " * indent + name)

      for error in errors + warnings:
         if 'follow' in error:
            subsystem = self[error['follow']]
            subsystem.printErrors(error['follow'], indent=(indent + 2))

      indentStr = " " * (indent + 2)

      for event, level in ((errors, 'ERROR'), (warnings, 'WARNING')):
         for error in event:
            if 'msg' in error:
               print(indentStr + "%s: %s" % (level, error['msg']))
            if error.get('traceback', None):
               print(indentStr + indentStr.join(error['traceback']))

   def toJson(self):
      class DebugJsonEncoder(json.JSONEncoder):
         def default(self, obj): # pylint: disable=E0202
            if isinstance(obj, (datetime, timedelta)):
               return str(obj)
            return super().default(self, obj)

      return json.dumps(self, cls=DebugJsonEncoder)


class SystemInfo(DebugInfo):
   """General system info.
   """

   def scan(self):
      try:
         self['ESX version'] = getEsxVersion()
      except Exception:
         self['ESX version'] = VALUE_UNKNOWN
         self.error("failed to retrieve ESX version")

      try:
         self['Changeset'] = getCln()
      except Exception:
         self['Changeset'] = VALUE_UNKNOWN
         self.warning("failed to retrieve ESX CLN")

      try:
         self['Build ID'] = getBuildId()
      except Exception:
         self['Build ID'] = VALUE_UNKNOWN
         self.warning("failed to retrieve ESX build ID")

      try:
         self['Host IP'] = getHostIp()
      except Exception:
         self['Host IP'] = VALUE_UNKNOWN
         self.error("failed to retrieve host IP")

      try:
         self['Hostname'] = getFQDN()
      except Exception:
         self['Hostname'] = VALUE_UNKNOWN
         self.warning("failed to retrieve ESX hostname")

      try:
         self['System UUID'] = getSystemUUID()
      except Exception:
         self['System UUID'] = VALUE_UNKNOWN
         self.error("failed to retrieve system UUID")

      try:
         self['Is nested'] = isEsxInAVm()
      except Exception:
         self['Is nested'] = VALUE_UNKNOWN
         self.error("failed to check whether ESX is running in a VM")

      self['SystemStorageNext'] = IS_SYSTEM_STORAGE_NEXT_ENABLED
      if not self['SystemStorageNext']:
         self.error("SystemStorageNext feature is disabled")

   def printInfo(self):
      print("System info:")
      print("  ESX version: %s" % self['ESX version'])
      print("  Changeset: %s" % self['Changeset'])
      print("  Build ID: %s" % self['Build ID'])
      print("  Nested ESX: %s" % self['Is nested'])
      print("  SystemStorageNext FSS: %sabled" %
            "en" if self['SystemStorageNext'] else "dis")
      print("  System UUID: %s" % self['System UUID'])
      print("  Host IP: %s" % self['Host IP'])
      print("  Hostname: %s" % self['Hostname'])


class BootDeviceInfo(DebugInfo):
   """Collects info relative to the ESX boot device.
   """

   def scan(self):
      try:
         self['firmware'] = 'UEFI' if isUefi else 'BIOS'
      except Exception:
         self['firmware'] = VALUE_UNKNOWN
         self.error("failed to retrieve firmware information")

      try:
         bootDev = runCli(['system', 'boot', 'device', 'get'], evalOutput=True)
      except Exception:
         self.error("failed to retrieve boot device information")
         self['PXE MAC'] = self['Bootbank'] = VALUE_UNKNOWN
         return

      if bootDev['Boot NIC']:
         self['PXE MAC'] = bootDev['Boot NIC']
      elif bootDev['Stateless Boot NIC']:
         self['PXE MAC'] = bootDev['Stateless Boot NIC']
      elif bootDev['Boot Filesystem UUID']:
         self['Bootbank'] = bootDev['Boot Filesystem UUID']
      else:
         if not WORKAROUND_PR2407016:
            self.error("failed to determine ESX boot device ('BOOTIF' / "
                       "'bootUUID'options missing on vmkernel command line)")

   def isPXE(self):
      return 'PXE MAC' in self

   def printInfo(self):
      print("Boot device:")
      print("  Firmware: %s" % self['firmware'])

      if self.isPXE():
         print("  PXE boot MAC: %s" % self['PXE MAC'])
      elif 'Bootbank' in self:
         print("  Bootbank: %s" % self['Bootbank'])
      else:
         # CDROM
         pass


class BootOptionsInfo(DebugInfo):
   """Collect info about the ESX boot options.
   """
   def scan(self):
      try:
         self.update(getVmkBootOptions())
      except Exception:
         self.error("failed to get vmkernel boot options")

   def printInfo(self):
      print("ESX boot options:")
      for opt, val in self.items():
         if isinstance(val, str):
            print("  %s: %s" % (opt, val))


class TmpInfo(DebugInfo):
   """Collect info about the /tmp ramdisk.
   """

   def scan(self):
      try:
         ramdisks = runCli(['system', 'visorfs', 'ramdisk', 'list'],
                           evalOutput=True)
      except Exception:
         self.error("failed to retrieve ramdisk info")
         return

      for ramdisk in ramdisks:
         if ramdisk['Mount Point'] == '/tmp':
            tmp = ramdisk
            break
      else:
         self.error("/tmp: failed to retrieve ramdisk info")
         return

      self['used'] = 100 - tmp['Free']
      if self['used'] > 90:
         self.error("/tmp ramdisk is %u%% full" % self['used'])

      self['size'] = tmp['Maximum']
      self['peak used'] = ceil((tmp['Peak Used'] * 100) / self['size'])
      if self['peak used'] > 90:
         self.error('/tmp ramdisk peak usage is %u%%' % self['peak used'])

   def printInfo(self):
      print("/tmp ramdisk:")
      print("  size: %s" % formatSize(self['size'] * 1024))
      print("  peak used: %u%%" % self['peak used'])
      print("  used: %s%%" % self['used'])


class Symlink(DebugInfo):
   """Collect information about a symbolic link.
   """

   def scan(self, path):
      self['src'] = path
      self['dest'] = []

      self['exists'] = os.path.lexists(path)
      if not self['exists']:
         self.error("%s: no such file or directory" % self['src'])

      self['broken'] = not os.path.exists(path)
      if self['broken']:
         self.error("%s: broken symlink" % self['src'])

      while os.path.islink(path):
         dest = os.readlink(path)
         self['dest'].append(dest)
         path = dest

   def printInfo(self, indent=0):
      s = " " * indent + " -> ".join([self['src']] + self['dest'])
      if not self['exists']:
         s += ": no such symlink"
      elif self['broken']:
         s += ": broken symlink"
      print(s)


class SystemSymlinksInfo(DebugInfo):
   """Collect information about the System Storage API symlinks.
   """

   def scan(self):
      if isEsxInstaller():
         # ESX Installer doesn't create SystemStorage symlinks.
         return

      symlinks = [BOOTBANK_LINK, VARLOG_LINK, VARCORE_LINK, VARCACHE_LINK,
                  SCRATCH_LINK, STORE_LINK]

      try:
         bootDev = runCli(['system', 'boot', 'device', 'get'], evalOutput=True)
      except Exception:
         self.error("failed to retrieve boot device information")
      else:
         if WORKAROUND_PR2407016:
            isPxe = bootDev['Boot Filesystem UUID'] == ""
         else:
            isPxe = (bootDev['Boot NIC'] != "" or
                     bootDev['Stateless Boot NIC'] != "")

         if not isPxe:
            # ALTBOOTBANK is never created when PXE-booting
            symlinks += [ALTBOOTBANK_LINK]

      for src in symlinks:
         self[src] = Symlink(src)

   def printInfo(self):
      print("SystemStorage API symlinks:")
      for symlink in self.values():
         if isinstance(symlink, Symlink):
            symlink.printInfo(indent=2)


class StorageAdaptersInfo(DebugInfo):
   """Collects info about the ESX storage adapters.
   """

   def scan(self):
      try:
         adapters = getStorageAdapters()
      except Exception:
         self.error("failed to query storage adapters information")

      if adapters:
         self.update(adapters)
      else:
         self.error("no storage adapter found")

   def printInfo(self):
      print("Storage adapters:")
      data = [[a['HBA Name'], a['Driver'], a['UID'], a['Link State'],
               a['Description']]
              for key, a in self.items() if key != 'errors']
      if data:
         header = ['<Name', '<Driver', '<UID', '<Link State', '<Description']
         printTable([header] + data, indent=2)
      else:
         print("  no storage adapter found")


class FssInfo(DebugInfo):
   """Collect info about the FSS volumes.
   """

   def scan(self):
      try:
         self.volumes = getFssVolumes()
      except Exception:
         self.volumes = []
         self.error("failed to retrieve ESX volumes")


class DiskInfo(DebugInfo):
   """Collect disk info.
   """

   def scan(self, disk, volumes):
      for key in ('name', 'vendor', 'model', 'size', 'type', 'local',
                  'removable', 'sectorSize', 'active boot disk', 'vsan',
                  'vsanDiskGroup'):
         self[key] = VALUE_UNKNOWN

      try:
         self['name'] = disk.name
         self['vendor'] = disk.vendor.strip()
         self['model'] = disk.model.strip()
         self['size'] = formatSize(disk.sizeInMB * MiB)
         self['type'] = disk.deviceType
         self['local'] = disk.isLocal
         self['removable'] = disk.isRemovable
         self['sectorSize'] = formatSize(disk.sectorSize)
         self['active boot disk'] = disk.isActiveBootDisk
         self['vsan'] = disk.isVsanClaimed
         self['vsanDiskGroup'] = disk.vsanGroupUuid if self['vsan'] else None
      except Exception:
         self.error("%s: failed to get disk info" % disk.name)

      self.scanPartitions(disk, volumes)

   def scanPartitions(self, disk, volumes):
      self['partitions'] = OrderedDict()
      self['gpt'] = self['gpt backup healthy'] = VALUE_UNKNOWN

      try:
         disk.scanPartitions()
      except Exception:
         self.error("%s: failed to scan disk partitions" % disk.name)
         return

      try:
         self['gpt'] = disk.isGpt
         if self['gpt']:
            self['gpt backup healthy'] = disk.isBackupGptHealthy()
      except Exception:
         self.error("%s: failed to retrieve MBR/GPT info" % disk.name)
         return

      if disk.isEmpty:
         return

      try:
         partitions = list(disk._pt.iterPartitions())
      except Exception:
         self.error("%s: failed to iterate over disk partitions" % disk.name)
         return

      partitions.sort(key=lambda p: p[0])

      for partNum, part in partitions:
         assert partNum == part.num

         partSize = (part.end - part.start) * disk.sectorSize

         for volume in volumes:
            if volume.diskName == disk.name and volume.partNum == part.num:
               break
         else:
            # Partition not formatted or volume not mounted
            volume = None

         if volume is not None:
            volumeType = volume.fsType
            volumeLabel = volume.label
            volumePath = volume.path
            if volume.fsSize == 0:
               self.warning("%s: invalid volume size (0 bytes)" % volume.name)
               used = None
            else:
               used = volume.fsPercentUsed
         else:
            volumeType = None
            volumeLabel = None
            volumePath = "Not mounted"
            used = None

         self['partitions'][part.num] = OrderedDict(
            [('num', part.num),
             ('fsType', part.fsType),
             ('label', part.label),
             ('start', part.start),
             ('end', part.end),
             ('size', formatSize(partSize)),
             ('volumeType', volumeType),
             ('used', None if used is None else "%u%%" % used),
             ('volumeLabel', volumeLabel),
             ('volumePath', volumePath)])

         if used is not None and used > 95:
            self.warning('%s: partition %u is %u%% full' %
                         (disk.name, part.num, used))

   def printInfo(self):
      print("  %s - %s %s (%s)" % (self['name'], self['vendor'], self['model'],
                                   self['size']))
      print("    type=%s local=%s removable=%s sectorSize=%s activeBoot=%s" %
            (self['type'], self['local'], self['removable'], self['sectorSize'],
             self['active boot disk']))

      if self['vsan']:
         print("    vsanDiskGroup=%s" % self['vsanDiskGroup'])

      if self['partitions']:
         header = ['>Num', '<Type', '<Label', '>Start', '>End', '>Size',
                   '<FS type', '>FS Usage', '<FS Label', '<Mount point']
         data = [list(p.values()) for p in self['partitions'].values()]
         printTable([header] + data, indent=6)
      else:
         print(" " * 6 + "disk empty (no partition found)")


class DisksInfo(DebugInfo):
   """Collect informations about ESX disk
   """

   def _scanDisk(self, disk, volumes):
      info = DiskInfo(disk, volumes)
      if disk.isVsanClaimed:
         self._vsanDisks[disk.name] = info
      else:
         self._hasEsxDisk = True
         self._esxDisks[disk.name] = info

   def scan(self, volumes):
      # Max number of disks being scanned in parallel. 10 is arbitrary picked as
      # a high enough value to speed things up by one order of magnitude, while
      # being low enough to avoid too much contention on the FS lock(s).
      MAX_DISK_SCANNING_THREADS = 10

      try:
         allDisks = list(iterDisks())
      except Exception:
         self.error("failed to list storage devices")
         return

      self._hasEsxDisk = False
      self._esxDisks = DebugInfo()
      self._vsanDisks = DebugInfo()

      tPool = ThreadPool(MAX_DISK_SCANNING_THREADS)
      for disk in allDisks:
         tPool.addJob(self._scanDisk, disk, volumes)
      tPool.destroy(True)

      self['esxDisks'] = self._esxDisks
      self['vsanDisks'] = self._vsanDisks
      if not self._hasEsxDisk:
         self.warning("no ESX disk found")

   def dumpDiskInfo(self, vsan=False):
      comp = 'vsan' if vsan else 'esx'
      print('%s disks:' % comp.upper())

      disks = self['%sDisks' % comp]
      if not disks:
         print("  No %s disk found." % comp.upper())
         return

      newline = False
      for key, disk in disks.items():
         if key == 'errors':
            continue
         if newline:
            print("")
         else:
            newline = True
         disk.printInfo()

   def printInfo(self):
      self.dumpDiskInfo(vsan=False)
      print("")
      self.dumpDiskInfo(vsan=True)


class OsdataInfo(Symlink):
   """Collect info about the OSDATA volume.
   """

   def _getOsdataPartition(self, esxDisks):
      for name, disk in esxDisks.items():
         if name == 'errors':
            continue

         for part in disk['partitions'].values():
            if part['volumePath'] == self['volume']:
               if disk['type'] == 'usb':
                  self.error("%s: OSDATA on USB (wear-sensitive) disk" %
                             self['volume'])

               if disk['vsan'] or disk['vsanDiskGroup']:
                  self.error("%s: OSDATA on VSAN disk (illegal)" %
                             self['volume'])

               if disk['removable']:
                  self.warning("%s: OSDATA on removable disk (unsafe)" %
                               self['volume'])

               if not disk['local']:
                  self.warning("%s: OSDATA on remote disk (expect reduced "
                               "performance)" % self['volume'])
               return part
      return None

   def _getOsdataVolume(self, volumes):
      for volume in volumes:
         if self['volume'].startswith(volume.path):
            return volume
      self.error('%s: no such OSDATA partition' % self['volume'])
      return None

   def scan(self, esxDisks, volumes, tmpInfo):
      if isEsxInstaller():
         # ESX Installer doesn't initialize OSDATA symlink.
         return

      super().scan(OSDATA_LINK)

      used = None

      if self['broken']:
         self['persistent'] = False
         self['used'] = VALUE_UNKNOWN
         self.error("%s: no OSDATA volume found" % self['src'])
         return

      self['volume'] = os.path.realpath(self['src'])
      if self['volume'].startswith("/tmp/"):
         self['persistent'] = False
         self.warning("%s: OSDATA is not persistent" % self['volume'])
         used = tmpInfo['used']
      else:
         self['persistent'] = True

         try:
            volumeId = getVolumeId(self['volume'])
         except ValueError as e:
            self.error("%s: invalid OSDATA volume: %s" % (self['volume'], e))

         osdataPart = self._getOsdataPartition(esxDisks)
         if osdataPart is not None:
            if osdataPart['used'] is None:
               self.error("%s: failed to retrieve OSDATA usage" %
                          self['volume'])
            else:
               used = int(osdataPart['used'].strip('%'))
         else:
            if isNfsVolume(volumeId):
               self.warning("%s: OSDATA is on NFS (expect reduced performance)"
                            % self['volume'])

            volume = self._getOsdataVolume(volumes)
            if volume is not None:
               if volume.fsSize == 0:
                  self.error("%s: invalid OSDATA size (0 bytes)" %
                             self['volume'])
               else:
                  used = volume.fsPercentUsed

         if os.path.basename(self['volume']) == ".locker":
            self.warning("%s: OSDATA on user datastore" % self['volume'])

      if used is None:
         self['used'] = VALUE_UNKNOWN
      else:
         self['used'] = used
         if used > 95:
            self.error("%s: OSDATA partition is %u%% full" %
                       (self['volume'], used))

   def printInfo(self):
      print("OSDATA")
      super().printInfo(indent=2)
      print("  persistent: %s" % self['persistent'])
      if self['used'] != VALUE_UNKNOWN:
         print("  usage: %u%%" % self['used'])
      else:
         print("  usage: %s" % self['used'])


class LockerInfo(Symlink):
   """Collect info about the LOCKER volume.
   """

   def scan(self, esxDisks, tmpInfo):
      super().scan(LOCKER_LINK)
      if self['broken']:
         self.error("%s: no LOCKER volume found" % self['src'])
         return

class EsxConfInfo(DebugInfo):
   """Collect the configuration values for the given esx.conf entries.
   """

   def scan(self, keys):
      ESXDOTCONF = "/etc/vmware/esx.conf"
      self['path'] = ESXDOTCONF
      self['conf'] = OrderedDict()

      try:
         for line in open(ESXDOTCONF, 'r'):
            try:
               key, _, val = line.partition('=')
            except Exception:
               continue

            key = key.strip()
            if key in keys:
               self['conf'][key] = val.strip()
      except Exception:
         self.error("failed to read %s" % ESXDOTCONF)

   def printInfo(self, indent=0):
      print(" " * indent + self['path'] + ":")
      indent += 2
      for key, val in self['conf'].items():
         print(" " * indent + "%s = %s" % (key, val))


class CoredumpInfo(DebugInfo):
   """Collect information relative to ESX coredump configuration.
   """

   def scan(self):
      try:
         self['autoCreateDumpFile'] = autoCoredumpEnabled()
      except Exception:
         self['autoCreateDumpFile'] = VALUE_UNKNOWN
         self.error("failed to retrieve 'autoCreateDumpFile' config option")

      try:
         self['diskDumpSlotSize'] = getDiskDumpSlotSize()
      except Exception:
         self['diskDumpSlotSize'] = VALUE_UNKNOWN
         self.error("failed to retrieve 'diskDumpSlotSize' config option")

      try:
         self['allowCoreDumpOnUsb'] = \
            EsxBootOpts().getBool('allowCoreDumpOnUsb')
      except Exception:
         self['allowCoreDumpOnUsb'] = VALUE_UNKNOWN
         self.error("failed to retrieve 'allowCoreDumpOnUsb' boot option")

      try:
         self['recommended size'] = calcNeededCoredumpSize()
      except Exception:
         self['recommended size'] = VALUE_UNKNOWN
         self.error("failed to calculate recommended coredump size")

      esxConfKeys = ['/storage/diagPart/lun', '/storage/diagPart/partition',
                     '/storage/diagPart/file', '/storage/diagPart/vsanUid',
                     '/Misc/DiskDumpSlotSize']
      self['esxconf'] = EsxConfInfo(esxConfKeys)

      self['locations'] = []

      try:
         coredumpLocations = list(iterateCoredumpLocations())
      except Exception:
         self.error("failed to iterate over coredump locations")

      for core in coredumpLocations:
         try:
            self['locations'] += [(core.type, core.configured, core.active,
                                   core.size, core.path)]
         except Exception:
            self.error("encountered error while getting coredump location info")
            continue

   def printInfo(self):
      print("Coredump config:")
      print("  autoCreateDumpFile: %s" % self['autoCreateDumpFile'])
      print("  diskDumpSlotSize: %uMB" % self['diskDumpSlotSize'])
      print("  allowCoreDumpOnUsb: %s" % self['allowCoreDumpOnUsb'])
      print("  recommended size: %uMB" % self['recommended size'])
      print("")
      self['esxconf'].printInfo(indent=2)

      print("\n  Coredump locations:")
      if self['locations']:
         header = ['<Type', '>Configured', '>Active', '>Size', '<Path']
         printTable([header] + self['locations'], indent=4)
      else:
         print("    No coredump configured.")


class LogInfo(DebugInfo):
   """Helper class to scan log files.
   """

   def _isStartLog(self, line):
      return self._startRe.search(line) is not None

   def _isEndLog(self, line):
      if self._successRe.search(line):
         return True
      elif self._failureRe.search(line):
         self.error("error log found in %s: %s" % (self['path'], line.strip()))
         return True
      else:
         return False

   def iterLogs(self, logFile):
      started = False
      for line in logFile:
         if self._isStartLog(line):
            started = True
         elif self._isEndLog(line):
            yield line
            break
         if started:
            yield line

   def scan(self, path):
      self['path'] = path
      self['log'] = ""

      if path.lower().endswith('.gz'):
         opener = gzip.open
         mode = 'rt'
      else:
         opener = open
         mode = 'r'

      try:
         logFile = opener(path, mode=mode, errors='replace')
      except Exception as e:
         self.error("%s: failed to open ESX log: %s" % (path, e))
         return

      try:
         for line in self.iterLogs(logFile):
            self['log'] += line
      except Exception as e:
         self.error("%s: failed to read from ESX log: %s" % (path, e))
      finally:
         logFile.close()

   def printInfo(self):
      print("%s" % self['path'])

      if self['log'] != "":
         for line in self['log'].split('\n'):
            print("  %s" % line)
      else:
         print("  <No log found (log may have rotated)>")


class JumpstartLogInfo(LogInfo):
   """Collect jumpstart logs for the system-storage plugin.
   """

   def __init__(self):
      self._pluginName = "system-storage"
      self._startRe = re.compile("Invoking method start on plugin %s" %
                                  self._pluginName)
      self._successRe = re.compile("Method start executed successfully for "
                                   "plugin %s" % self._pluginName)
      self._failureRe = re.compile("Plugin %s failed Invoking method start"
                                   % self._pluginName)
      super().__init__("/var/log/jumpstart-native-stdout.log")


   def _parseTimestamp(self, line):
      JUMPSTART_TIMESTAMP_FMT = "%Y-%m-%dT%H:%M:%S.%fZ"
      ts = re.split(' |\|', line)[0]
      return datetime.strptime(ts, JUMPSTART_TIMESTAMP_FMT)

   def _isStartLog(self, line):
      match = super()._isStartLog(line)
      if match:
         self['start'] = self._parseTimestamp(line)
      return match

   def _isEndLog(self, line):
      match = super()._isEndLog(line)
      if match:
         self['end'] = self._parseTimestamp(line)
         self['status'] = 0 if self._successRe.search(line) is not None else 1
      return match

   def scan(self, path):
      self['plugin'] = self._pluginName
      self['start'] = None
      self['end'] = None
      self['runtime'] = None
      self['status'] = None

      super().scan(path)

      if self['status'] is None:
         self.error("failed to retrieve jumpstart-%s exit status" %
                    self['plugin'])
      elif self['status'] != 0:
         self.error("jumpstart-%s failed to execute (see details in %s)" %
                    (self['plugin'], self['path']))

      if self['start'] is None:
         self.error("failed to retrieve jumpstart-%s start time" %
                    self['plugin'])

      if self['end'] is None:
         self.error("failed to retrieve jumpstart-%s end time" % self['plugin'])

      if self['start'] is not None and self['end'] is not None:
         if self['start'] > self['end']:
            self.error("invalid jumpstart-%s start/end times "
                       "(start=%s > end=%s)" %
                       (self['plugin'], self['start'], self['end']))

         self['runtime'] = self['end'] - self['start']

   def printInfo(self):
      print("Jumpstart:")
      print("  plugin: %s" % self['plugin'])
      if self['status'] == 0:
         status = "success"
      elif self['status'] == 1:
         status = "failure"
      else:
         status = "unknown"
      print("  Status: %s" % status)
      print("  Runtime: %s" % str(self['runtime']))

      print("")
      super().printInfo()

class BootGzLogInfo(LogInfo):
   """Collect vmkernel logs for the system-storage plugin.
   """

   def __init__(self):
      pluginName = "system-storage"
      self._startRe = re.compile("Activating Jumpstart plugin %s." % pluginName)
      self._successRe = re.compile("Jumpstart plugin %s activated." %
                                   pluginName)
      self._failureRe = re.compile("Jumpstart plugin %s activation failed:" %
                                   pluginName)
      super().__init__("/var/log/boot.gz")


class SyslogInfo(LogInfo):
   """Collect syslog logs for the system-storage plugin.
   """

   def __init__(self):
      pluginName = "system-storage"
      self._startRe = re.compile("executing start plugin: %s" % pluginName)
      self._successRe = re.compile("Method start executed successfully for "
                                   "plugin %s" % pluginName)
      self._failureRe = re.compile("Plugin '%s' failed Invoking method 'start'"
                                   % pluginName)
      super().__init__("/var/log/syslog.log")


class SystemStorageInfo(DebugInfo):
   """Collect info about the SystemStorage layer.
   """

   def scan(self):
      self['generalSystemInfo'] = SystemInfo()
      self['bootDevInfo'] = BootDeviceInfo()
      self['esxBootOptions'] = BootOptionsInfo()
      self['Tmp'] = TmpInfo()
      self['systemSymlinks'] = SystemSymlinksInfo()
      self['storageAdapters'] = StorageAdaptersInfo()
      fss = FssInfo()
      self['fsSwitch'] = fss
      self['disks'] = DisksInfo(fss.volumes)
      self['osdata'] = OsdataInfo(self['disks']['esxDisks'], fss.volumes,
                                  self['Tmp'])
      self['coredump'] = CoredumpInfo()
      self['jumpstartLog'] = JumpstartLogInfo()
      self['syslog'] = SyslogInfo()
      self['vmkernelLog'] = BootGzLogInfo()

   def printInfo(self):
      subsystems = ('generalSystemInfo', 'bootDevInfo', 'esxBootOptions',
                    'Tmp', 'systemSymlinks', 'storageAdapters', 'disks',
                    'osdata', 'coredump', 'jumpstartLog', 'syslog',
                    'vmkernelLog')

      for key in subsystems:
         self[key].printInfo()
         print('')

      self.printErrors('SystemStorage')


#
# Entry point
#
if __name__ == "__main__":
   from argparse import ArgumentParser

   parser = ArgumentParser(description='ESX System Storage debugging script.')
   parser.add_argument('--json', action='store_true', dest='json')
   args = parser.parse_args()

   debug = SystemStorageInfo()

   if args.json:
      print(debug.toJson())
   else:
      debug.printInfo()
