html

<table border="1"><thead><tr><th>body</th><th>header</th></tr></thead><tbody><tr><td><table border="1"><tr><th>alert</th><td><table border="1"><tr><th>content</th><td>The health test result for NAME_NODE_RPC_LATENCY has become bad: The moving average of the RPC latency is 14.8 second(s) over the previous 5 minute(s). The moving average of the queue time is 197 millisecond(s). The moving average of the processing time is 14.6 second(s). Critical threshold: 5 second(s).</td></tr><tr><th>attributes</th><td><table border="1"><tr><th>__persist_timestamp</th><td><ul><li>1555551632148</li></ul></td></tr><tr><th>HOST_IDS</th><td><ul><li>28c66578-60fe-427c-8806-5adc75270062</li></ul></td></tr><tr><th>EVENTCODE</th><td><ul><li>EV_ROLE_HEALTH_CHECK_BAD</li><li>EV_ROLE_HEALTH_CHECK_CONCERNING</li><li>EV_ROLE_HEALTH_CHECK_GOOD</li></ul></td></tr><tr><th>ALERT_SUPPRESSED</th><td><ul><li>false</li></ul></td></tr><tr><th>HEALTH_TEST_NAME</th><td><ul><li>NAME_NODE_RPC_LATENCY</li></ul></td></tr><tr><th>MONITOR_STARTUP</th><td><ul><li>false</li></ul></td></tr><tr><th>ROLE_TYPE</th><td><ul><li>NAMENODE</li></ul></td></tr><tr><th>BAD_TEST_RESULTS</th><td><ul><li>1</li></ul></td></tr><tr><th>__uuid</th><td><ul><li>a33c1b09-9161-4813-a76e-70233b68c402</li></ul></td></tr><tr><th>PREVIOUS_HEALTH_SUMMARY</th><td><ul><li>YELLOW</li></ul></td></tr><tr><th>CURRENT_HEALTH_SUMMARY</th><td><ul><li>RED</li></ul></td></tr><tr><th>SERVICE_DISPLAY_NAME</th><td><ul><li>HDFS</li></ul></td></tr><tr><th>CLUSTER_ID</th><td><ul><li>17</li></ul></td></tr><tr><th>ALERT_SUMMARY</th><td><ul><li>The health of role NameNode (yqbhd0003) has become bad.</li></ul></td></tr><tr><th>CLUSTER_DISPLAY_NAME</th><td><ul><li>Cluster 1</li></ul></td></tr><tr><th>HOSTS</th><td><ul><li>yqbhd0003.prd.wgq</li></ul></td></tr><tr><th>ROLE_DISPLAY_NAME</th><td><ul><li>NameNode (yqbhd0003)</li></ul></td></tr><tr><th>SERVICE_TYPE</th><td><ul><li>HDFS</li></ul></td></tr><tr><th>HEALTH_TEST_RESULTS</th><td><table border="1"><thead><tr><th>content</th><th>suppressed</th><th>testName</th><th>eventCode</th><th>severity</th></tr></thead><tbody><tr><td>The health test result for NAME_NODE_RPC_LATENCY has become bad: The moving average of the RPC latency is 14.8 second(s) over the previous 5 minute(s). The moving average of the queue time is 197 millisecond(s). The moving average of the processing time is 14.6 second(s). Critical threshold: 5 second(s).</td><td>False</td><td>NAME_NODE_RPC_LATENCY</td><td>EV_ROLE_HEALTH_CHECK_BAD</td><td>CRITICAL</td></tr></tbody></table></td></tr><tr><th>SEVERITY</th><td><ul><li>CRITICAL</li></ul></td></tr><tr><th>CATEGORY</th><td><ul><li>HEALTH_CHECK</li></ul></td></tr><tr><th>CURRENT_COMPLETE_HEALTH_TEST_RESULTS</th><td><ul><li>{"content":"The health test result for NAME_NODE_RPC_LATENCY has become bad: The moving average of the RPC latency is 14.8 second(s) over the previous 5 minute(s). The moving average of the queue time is 197 millisecond(s). The moving average of the processing time is 14.6 second(s). Critical threshold: 5 second(s).","testName":"NAME_NODE_RPC_LATENCY","eventCode":"EV_ROLE_HEALTH_CHECK_BAD","severity":"CRITICAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SWAP_MEMORY_USAGE has become concerning: 40.2 MiB of swap memory is being used by this role's process. Warning threshold: any.","testName":"NAME_NODE_SWAP_MEMORY_USAGE","eventCode":"EV_ROLE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_DIRECTORY_FAILURES has become good: 2 active status directories: /data02/dfs/nn, /data03/dfs/nn.","testName":"NAME_NODE_DIRECTORY_FAILURES","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SAFE_MODE has become good: This NameNode is not in safe mode.","testName":"NAME_NODE_SAFE_MODE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_JOURNAL_NODE_SYNC_STATUS has become good: JournalNodes in sync: yqbhd0002.prd.wgq, yqbhd0003.prd.wgq, yqbhd0004.prd.wgq, yqbhd0005.prd.wgq, yqbhd0006.prd.wgq.","testName":"NAME_NODE_JOURNAL_NODE_SYNC_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_UPGRADE_STATUS has become good: This NameNode does not have an unfinalized metadata upgrade.","testName":"NAME_NODE_UPGRADE_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_ROLLING_UPGRADE_STATUS has become good: This NameNode does not have an unfinalized rolling upgrade.","testName":"NAME_NODE_ROLLING_UPGRADE_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HA_CHECKPOINT_AGE has become good: The filesystem checkpoint is 3 minute(s), 34 second(s) old. This is 5.94% of the configured checkpoint period of 1 hour(s). 429,522 transactions have occurred since the last filesystem checkpoint. This is 42.95% of the configured checkpoint transaction target of 1,000,000.","testName":"NAME_NODE_HA_CHECKPOINT_AGE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SCM_HEALTH has become good: This role's status is as expected. The role is started.","testName":"NAME_NODE_SCM_HEALTH","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_UNEXPECTED_EXITS has become good: This role encountered 0 unexpected exit(s) in the previous 5 minute(s).","testName":"NAME_NODE_UNEXPECTED_EXITS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_FILE_DESCRIPTOR has become good: Open file descriptors: 1,759. File descriptor limit: 32,768. Percentage in use: 5.37%.","testName":"NAME_NODE_FILE_DESCRIPTOR","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_LOG_DIRECTORY_FREE_SPACE has become good: This role's Log Directory (/var/log/hadoop-hdfs) is on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_LOG_DIRECTORY_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HEAP_DUMP_DIRECTORY_FREE_SPACE has become good: This role's Heap Dump Directory (/tmp) is on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_HEAP_DUMP_DIRECTORY_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HOST_HEALTH has become good: The health of this role's host is good.","testName":"NAME_NODE_HOST_HEALTH","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_WEB_METRIC_COLLECTION has become good: The web server of this role is responding with metrics. The most recent collection took 111 millisecond(s).","testName":"NAME_NODE_WEB_METRIC_COLLECTION","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_PAUSE_DURATION has become good: Average time spent paused was 47 millisecond(s) (0.08%) per minute over the previous 5 minute(s).","testName":"NAME_NODE_PAUSE_DURATION","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_DATA_DIRECTORIES_FREE_SPACE has become good: This role's NameNode Data Directories (/data02/dfs/nn, /data03/dfs/nn) are on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_DATA_DIRECTORIES_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li></ul></td></tr><tr><th>PREVIOUS_COMPLETE_HEALTH_TEST_RESULTS</th><td><ul><li>{"content":"The health test result for NAME_NODE_RPC_LATENCY has become concerning: The moving average of the RPC latency is 3.3 second(s) over the previous 5 minute(s). The moving average of the queue time is 44 millisecond(s). The moving average of the processing time is 3.3 second(s). Warning threshold: 1 second(s).","testName":"NAME_NODE_RPC_LATENCY","eventCode":"EV_ROLE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SWAP_MEMORY_USAGE has become concerning: 40.2 MiB of swap memory is being used by this role's process. Warning threshold: any.","testName":"NAME_NODE_SWAP_MEMORY_USAGE","eventCode":"EV_ROLE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_DIRECTORY_FAILURES has become good: 2 active status directories: /data02/dfs/nn, /data03/dfs/nn.","testName":"NAME_NODE_DIRECTORY_FAILURES","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SAFE_MODE has become good: This NameNode is not in safe mode.","testName":"NAME_NODE_SAFE_MODE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_JOURNAL_NODE_SYNC_STATUS has become good: JournalNodes in sync: yqbhd0002.prd.wgq, yqbhd0003.prd.wgq, yqbhd0004.prd.wgq, yqbhd0005.prd.wgq, yqbhd0006.prd.wgq.","testName":"NAME_NODE_JOURNAL_NODE_SYNC_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_UPGRADE_STATUS has become good: This NameNode does not have an unfinalized metadata upgrade.","testName":"NAME_NODE_UPGRADE_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_ROLLING_UPGRADE_STATUS has become good: This NameNode does not have an unfinalized rolling upgrade.","testName":"NAME_NODE_ROLLING_UPGRADE_STATUS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HA_CHECKPOINT_AGE has become good: The filesystem checkpoint is 3 minute(s), 34 second(s) old. This is 5.94% of the configured checkpoint period of 1 hour(s). 424,718 transactions have occurred since the last filesystem checkpoint. This is 42.47% of the configured checkpoint transaction target of 1,000,000.","testName":"NAME_NODE_HA_CHECKPOINT_AGE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_SCM_HEALTH has become good: This role's status is as expected. The role is started.","testName":"NAME_NODE_SCM_HEALTH","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_UNEXPECTED_EXITS has become good: This role encountered 0 unexpected exit(s) in the previous 5 minute(s).","testName":"NAME_NODE_UNEXPECTED_EXITS","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_FILE_DESCRIPTOR has become good: Open file descriptors: 1,704. File descriptor limit: 32,768. Percentage in use: 5.20%.","testName":"NAME_NODE_FILE_DESCRIPTOR","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_LOG_DIRECTORY_FREE_SPACE has become good: This role's Log Directory (/var/log/hadoop-hdfs) is on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_LOG_DIRECTORY_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HEAP_DUMP_DIRECTORY_FREE_SPACE has become good: This role's Heap Dump Directory (/tmp) is on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_HEAP_DUMP_DIRECTORY_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_HOST_HEALTH has become good: The health of this role's host is good.","testName":"NAME_NODE_HOST_HEALTH","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_WEB_METRIC_COLLECTION has become good: The web server of this role is responding with metrics. The most recent collection took 140 millisecond(s).","testName":"NAME_NODE_WEB_METRIC_COLLECTION","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_PAUSE_DURATION has become good: Average time spent paused was 26 millisecond(s) (0.04%) per minute over the previous 5 minute(s).","testName":"NAME_NODE_PAUSE_DURATION","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for NAME_NODE_DATA_DIRECTORIES_FREE_SPACE has become good: This role's NameNode Data Directories (/data02/dfs/nn, /data03/dfs/nn) are on a filesystem with more than 10.0 GiB of its space free.","testName":"NAME_NODE_DATA_DIRECTORIES_FREE_SPACE","eventCode":"EV_ROLE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li></ul></td></tr><tr><th>ALERT</th><td><ul><li>true</li></ul></td></tr><tr><th>CLUSTER</th><td><ul><li>cluster</li></ul></td></tr><tr><th>SERVICE</th><td><ul><li>hdfs</li></ul></td></tr><tr><th>ROLE</th><td><ul><li>hdfs-NAMENODE-0fb83a5af06aceda10fa4efaf42d4316</li></ul></td></tr></table></td></tr><tr><th>source</th><td></td></tr><tr><th>timestamp</th><td><table border="1"><tr><th>epochMs</th><td>1555551631597</td></tr><tr><th>iso8601</th><td>2019-04-18T01:40:31Z</td></tr></table></td></tr></table></td></tr></table></td><td><table border="1"><tr><th>version</th><td>2</td></tr><tr><th>type</th><td>alert</td></tr></table></td></tr><tr><td><table border="1"><tr><th>alert</th><td><table border="1"><tr><th>content</th><td>The health test result for HDFS_HA_NAMENODE_HEALTH has become bad: NameNode summary: yqbhd0003.prd.wgq (Availability: Active, Health: Bad), yqbhd0002.prd.wgq (Availability: Standby, Health: Concerning). This health test reflects the health of the active NameNode.</td></tr><tr><th>attributes</th><td><table border="1"><tr><th>__persist_timestamp</th><td><ul><li>1555551632150</li></ul></td></tr><tr><th>EVENTCODE</th><td><ul><li>EV_SERVICE_HEALTH_CHECK_BAD</li><li>EV_SERVICE_HEALTH_CHECK_CONCERNING</li><li>EV_SERVICE_HEALTH_CHECK_GOOD</li></ul></td></tr><tr><th>NAMESERVICE</th><td><ul><li>yqb</li></ul></td></tr><tr><th>ALERT_SUPPRESSED</th><td><ul><li>false</li></ul></td></tr><tr><th>HEALTH_TEST_NAME</th><td><ul><li>HDFS_HA_NAMENODE_HEALTH</li></ul></td></tr><tr><th>MONITOR_STARTUP</th><td><ul><li>false</li></ul></td></tr><tr><th>BAD_TEST_RESULTS</th><td><ul><li>1</li></ul></td></tr><tr><th>__uuid</th><td><ul><li>ee8ade39-4d10-4d6a-a9b2-c71733099901</li></ul></td></tr><tr><th>PREVIOUS_HEALTH_SUMMARY</th><td><ul><li>YELLOW</li></ul></td></tr><tr><th>CURRENT_HEALTH_SUMMARY</th><td><ul><li>RED</li></ul></td></tr><tr><th>SERVICE_DISPLAY_NAME</th><td><ul><li>HDFS</li></ul></td></tr><tr><th>ALERT_SUMMARY</th><td><ul><li>The health of service hdfs has become bad.</li></ul></td></tr><tr><th>CLUSTER_DISPLAY_NAME</th><td><ul><li>Cluster 1</li></ul></td></tr><tr><th>SERVICE_TYPE</th><td><ul><li>HDFS</li></ul></td></tr><tr><th>HEALTH_TEST_RESULTS</th><td><table border="1"><thead><tr><th>content</th><th>suppressed</th><th>testName</th><th>eventCode</th><th>severity</th></tr></thead><tbody><tr><td>The health test result for HDFS_HA_NAMENODE_HEALTH has become bad: NameNode summary: yqbhd0003.prd.wgq (Availability: Active, Health: Bad), yqbhd0002.prd.wgq (Availability: Standby, Health: Concerning). This health test reflects the health of the active NameNode.</td><td>False</td><td>HDFS_HA_NAMENODE_HEALTH</td><td>EV_SERVICE_HEALTH_CHECK_BAD</td><td>CRITICAL</td></tr></tbody></table></td></tr><tr><th>SEVERITY</th><td><ul><li>CRITICAL</li></ul></td></tr><tr><th>CATEGORY</th><td><ul><li>HEALTH_CHECK</li></ul></td></tr><tr><th>CURRENT_COMPLETE_HEALTH_TEST_RESULTS</th><td><ul><li>{"content":"The health test result for HDFS_HA_NAMENODE_HEALTH has become bad: NameNode summary: yqbhd0003.prd.wgq (Availability: Active, Health: Bad), yqbhd0002.prd.wgq (Availability: Standby, Health: Concerning). This health test reflects the health of the active NameNode.","testName":"HDFS_HA_NAMENODE_HEALTH","eventCode":"EV_SERVICE_HEALTH_CHECK_BAD","severity":"CRITICAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_DATA_NODES_HEALTHY has become concerning: Healthy DataNode: 27. Concerning DataNode: 289. Total DataNode: 316. Percent healthy: 8.54%. Percent healthy or concerning: 100.00%. Warning threshold: 95.00%.","testName":"HDFS_DATA_NODES_HEALTHY","eventCode":"EV_SERVICE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for HDFS_FAILOVER_CONTROLLERS_HEALTHY has become concerning: Failover Controllers with concerning health: yqbhd0003.prd.wgq, yqbhd0002.prd.wgq. The following health tests are concerning: swap memory usage. The following health tests are concerning: swap memory usage.","testName":"HDFS_FAILOVER_CONTROLLERS_HEALTHY","eventCode":"EV_SERVICE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for HDFS_FREE_SPACE_REMAINING has become good: Space free in the cluster: 2.8 PiB. Capacity of the cluster: 11.7 PiB. Percentage of capacity free: 23.81%.","testName":"HDFS_FREE_SPACE_REMAINING","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_BLOCKS_WITH_CORRUPT_REPLICAS has become good: 0 blocks with corrupt replicas in the cluster. 110,873,437 total blocks in the cluster. Percentage blocks with corrupt replicas: 0.00%.","testName":"HDFS_BLOCKS_WITH_CORRUPT_REPLICAS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_MISSING_BLOCKS has become good: 0 missing blocks in the cluster. 110,873,437 total blocks in the cluster. Percentage missing blocks: 0.00%. This health test is currently suppressed.","testName":"HDFS_MISSING_BLOCKS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":true}</li><li>{"content":"The health test result for HDFS_UNDER_REPLICATED_BLOCKS has become good: 0 under replicated blocks in the cluster. 110,873,437 total blocks in the cluster. Percentage under replicated blocks: 0.00%.","testName":"HDFS_UNDER_REPLICATED_BLOCKS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_CANARY_HEALTH has become good: Canary test of file create, write, read and delete operations succeeded.","testName":"HDFS_CANARY_HEALTH","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li></ul></td></tr><tr><th>PREVIOUS_COMPLETE_HEALTH_TEST_RESULTS</th><td><ul><li>{"content":"The health test result for HDFS_HA_NAMENODE_HEALTH has become concerning: NameNode summary: yqbhd0003.prd.wgq (Availability: Active, Health: Concerning), yqbhd0002.prd.wgq (Availability: Standby, Health: Concerning). This health test reflects the health of the active NameNode.","testName":"HDFS_HA_NAMENODE_HEALTH","eventCode":"EV_SERVICE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for HDFS_DATA_NODES_HEALTHY has become concerning: Healthy DataNode: 27. Concerning DataNode: 289. Total DataNode: 316. Percent healthy: 8.54%. Percent healthy or concerning: 100.00%. Warning threshold: 95.00%.","testName":"HDFS_DATA_NODES_HEALTHY","eventCode":"EV_SERVICE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for HDFS_FAILOVER_CONTROLLERS_HEALTHY has become concerning: Failover Controllers with concerning health: yqbhd0003.prd.wgq, yqbhd0002.prd.wgq. The following health tests are concerning: swap memory usage. The following health tests are concerning: swap memory usage.","testName":"HDFS_FAILOVER_CONTROLLERS_HEALTHY","eventCode":"EV_SERVICE_HEALTH_CHECK_CONCERNING","severity":"IMPORTANT","suppressed":false}</li><li>{"content":"The health test result for HDFS_FREE_SPACE_REMAINING has become good: Space free in the cluster: 2.8 PiB. Capacity of the cluster: 11.7 PiB. Percentage of capacity free: 23.81%.","testName":"HDFS_FREE_SPACE_REMAINING","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_BLOCKS_WITH_CORRUPT_REPLICAS has become good: 0 blocks with corrupt replicas in the cluster. 110,873,437 total blocks in the cluster. Percentage blocks with corrupt replicas: 0.00%.","testName":"HDFS_BLOCKS_WITH_CORRUPT_REPLICAS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_MISSING_BLOCKS has become good: 0 missing blocks in the cluster. 110,873,437 total blocks in the cluster. Percentage missing blocks: 0.00%. This health test is currently suppressed.","testName":"HDFS_MISSING_BLOCKS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":true}</li><li>{"content":"The health test result for HDFS_UNDER_REPLICATED_BLOCKS has become good: 0 under replicated blocks in the cluster. 110,873,437 total blocks in the cluster. Percentage under replicated blocks: 0.00%.","testName":"HDFS_UNDER_REPLICATED_BLOCKS","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li><li>{"content":"The health test result for HDFS_CANARY_HEALTH has become good: Canary test of file create, write, read and delete operations succeeded.","testName":"HDFS_CANARY_HEALTH","eventCode":"EV_SERVICE_HEALTH_CHECK_GOOD","severity":"INFORMATIONAL","suppressed":false}</li></ul></td></tr><tr><th>ALERT</th><td><ul><li>true</li></ul></td></tr><tr><th>CLUSTER</th><td><ul><li>cluster</li></ul></td></tr><tr><th>SERVICE</th><td><ul><li>hdfs</li></ul></td></tr><tr><th>CLUSTER_ID</th><td><ul><li>17</li></ul></td></tr></table></td></tr><tr><th>source</th><td></td></tr><tr><th>timestamp</th><td><table border="1"><tr><th>epochMs</th><td>1555551631597</td></tr><tr><th>iso8601</th><td>2019-04-18T01:40:31Z</td></tr></table></td></tr></table></td></tr></table></td><td><table border="1"><tr><th>version</th><td>2</td></tr><tr><th>type</th><td>alert</td></tr></table></td></tr></tbody></table>