/*
Amazon SQL Interview Question

Identify continuous periods of a specific service status (down) from the service_log table, 
where the downtime lasts for 5 or more consecutive records, and summarize the start and end times for these periods.
*/

-- Schema Setup
CREATE TABLE service_log (
 service_name VARCHAR(50),
 updated_time DATETIME,
 status VARCHAR(10)
);

INSERT INTO service_log (service_name, updated_time, status)
VALUES
('hdfs', '2024-03-06 10:00:00', 'up'),
('hdfs', '2024-03-06 10:01:00', 'up'),
('hdfs', '2024-03-06 10:02:00', 'down'),
('hdfs', '2024-03-06 10:03:00', 'down'),
('hdfs', '2024-03-06 10:04:00', 'down'),
('hdfs', '2024-03-06 10:05:00', 'down'),
('hdfs', '2024-03-06 10:06:00', 'down'),
('hdfs', '2024-03-06 10:07:00', 'up'),
('hdfs', '2024-03-06 10:08:00', 'up'),
('hdfs', '2024-03-06 10:09:00', 'down'),
('hdfs', '2024-03-06 10:10:00', 'down');

-- Solution
WITH down_service_log AS
(
 SELECT
    *,
    MINUTE(updated_time) - DENSE_RANK() OVER(PARTITION BY status ORDER BY MINUTE(updated_time)) AS diff
 FROM service_log
 WHERE status = 'down'
)

SELECT
    service_name,
    status,
    MIN(updated_time) AS start_time,
    MAX(updated_time) AS end_time
FROM down_service_log
GROUP BY service_name,status,diff
HAVING COUNT(*) >= 5;

Embed on website

To embed this program on your website, copy the following code and paste it into your website's HTML: