/*
Uber SQL Interview Question

Some forecasting methods are extremely simple and surprisingly effective. Naïve forecast is one of them.
To create a naïve forecast for "distance per dollar" (defined as distance_to_travel/monetary_cost), first sum the "distance to travel" and "monetary cost" values monthly. This gives the actual value for the current month.
For the forecasted value, use the previous month's value.
After obtaining both actual and forecasted values, calculate the root mean squared error (RMSE) using the formula RMSE = sqrt(mean(square(actual - forecast))). Report the RMSE rounded to two decimal places.
*/

-- Schema Setup
CREATE TABLE uber_request_logs(request_id int, request_date datetime, request_status varchar(10), distance_to_travel float, monetary_cost float, driver_to_client_distance float);

INSERT INTO uber_request_logs VALUES (1,'2020-01-09','success', 70.59, 6.56,14.36), (2,'2020-01-24','success', 93.36, 22.68,19.9), (3,'2020-02-08','fail', 51.24, 11.39,21.32), (4,'2020-02-23','success', 61.58,8.04,44.26), (5,'2020-03-09','success', 25.04,7.19,1.74), (6,'2020-03-24','fail', 45.57, 4.68,24.19), (7,'2020-04-08','success', 24.45,12.69,15.91), (8,'2020-04-23','success', 48.22,11.2,48.82), (9,'2020-05-08','success', 56.63,4.04,16.08), (10,'2020-05-23','fail', 19.03,16.65,11.22), (11,'2020-06-07','fail', 81,6.56,26.6), (12,'2020-06-22','fail', 21.32,8.86,28.57), (13,'2020-07-07','fail', 14.74,17.76,19.33), (14,'2020-07-22','success',66.73,13.68,14.07), (15,'2020-08-06','success',32.98,16.17,25.34), (16,'2020-08-21','success',46.49,1.84,41.9), (17,'2020-09-05','fail', 45.98,12.2,2.46), (18,'2020-09-20','success',3.14,24.8,36.6), (19,'2020-10-05','success',75.33,23.04,29.99), (20,'2020-10-20','success', 53.76,22.94,18.74);

-- Solution
WITH aggregates AS
(
 SELECT
    DATE_FORMAT(request_date, '%Y-%m') AS yearmonth,
    SUM(distance_to_travel) AS total_distance,
    SUM(monetary_cost) AS total_cost
 FROM uber_request_logs
 GROUP BY yearmonth
)
,calculations AS
(
 SELECT
    yearmonth,
    total_distance/total_cost AS actual_value,
    LAG(total_distance/total_cost) OVER(ORDER BY yearmonth) AS forecast_value
 FROM aggregates
)
    
SELECT
    ROUND(SQRT(AVG(POWER(actual_value - forecast_value,2))),2) AS rmse
FROM calculations
WHERE forecast_value IS NOT NULL;

Embed on website

To embed this program on your website, copy the following code and paste it into your website's HTML: