/*
Google SQL Interview Question

Calculate the average session distance traveled by Google Fit users using GPS data for two scenarios:
 Considering Earth's curvature (Haversine formula).
 Assuming a flat surface.
For each session, use the distance between the highest and lowest step IDs, and ignore sessions with only one step. Calculate and output the average distance for both scenarios and the difference between them.

Formulas:
1. Curved Earth: d=6371×arccos(sin(ϕ1)×sin(ϕ2)+cos(ϕ1)×cos(ϕ2)×cos(λ2−λ1))
2. Flat Surface: d=111×(lat2−lat1)2+(lon2−lon1)2
*/

-- Schema Setup
CREATE TABLE google_fit_location (user_id VARCHAR(50),session_id INT,step_id INT,day INT,latitude FLOAT,longitude FLOAT,altitude FLOAT);

INSERT INTO google_fit_location (user_id, session_id, step_id, day, latitude, longitude, altitude)VALUES('user_1', 101, 1, 1, 37.7749, -122.4194, 15.0),('user_1', 101, 2, 1, 37.7750, -122.4195, 15.5),('user_1', 101, 3, 1, 37.7751, -122.4196, 16.0),('user_1', 102, 1, 1, 34.0522, -118.2437, 20.0),('user_1', 102, 2, 1, 34.0523, -118.2438, 20.5),('user_2', 201, 1, 1, 40.7128, -74.0060, 5.0),('user_2', 201, 2, 1, 40.7129, -74.0061, 5.5),('user_2', 202, 1, 1, 51.5074, -0.1278, 10.0),('user_2', 202, 2, 1, 51.5075, -0.1279, 10.5),('user_3', 301, 1, 1, 48.8566, 2.3522, 25.0),('user_3', 301, 2, 1, 48.8567, 2.3523, 25.5);

-- Solution
WITH SessionMinMax AS (
 SELECT
 user_id,
 session_id,
 day,
 MIN(step_id) AS start_step_id,
 MAX(step_id) AS end_step_id,
 MIN(latitude) AS start_lat,
 MIN(longitude) AS start_lon,
 MAX(latitude) AS end_lat,
 MAX(longitude) AS end_lon
 FROM (
 SELECT 
 user_id,
 session_id,
 day,
 step_id,
 latitude,
 longitude,
 ROW_NUMBER() OVER (PARTITION BY user_id, session_id, day ORDER BY step_id ASC) AS row_num_asc,
 ROW_NUMBER() OVER (PARTITION BY user_id, session_id, day ORDER BY step_id DESC) AS row_num_desc
 FROM google_fit_location
 ) t
 WHERE row_num_asc = 1 OR row_num_desc = 1
 GROUP BY user_id, session_id, day
),
Distances AS (
 SELECT 
 user_id,
 session_id,
 day,
 6371 * ACOS(
 SIN(RADIANS(start_lat)) * SIN(RADIANS(end_lat)) +
 COS(RADIANS(start_lat)) * COS(RADIANS(end_lat)) * COS(RADIANS(end_lon - start_lon))
 ) AS distance_curved,
 SQRT(POWER(end_lat - start_lat, 2) + POWER(end_lon - start_lon, 2)) * 111 AS distance_flat
 FROM SessionMinMax
)
SELECT
 AVG(distance_curved) AS avg_distance_curved,
 AVG(distance_flat) AS avg_distance_flat,
 AVG(distance_curved) - AVG(distance_flat) AS difference
FROM Distances;

Embed on website

To embed this program on your website, copy the following code and paste it into your website's HTML: