/*
Google SQL Interview Question
Calculate the average session distance traveled by Google Fit users using GPS data for two scenarios:
Considering Earth's curvature (Haversine formula).
Assuming a flat surface.
For each session, use the distance between the highest and lowest step IDs, and ignore sessions with only one step. Calculate and output the average distance for both scenarios and the difference between them.
Formulas:
1. Curved Earth: d=6371×arccos(sin(ϕ1)×sin(ϕ2)+cos(ϕ1)×cos(ϕ2)×cos(λ2−λ1))
2. Flat Surface: d=111×(lat2−lat1)2+(lon2−lon1)2
*/
-- Schema Setup
CREATE TABLE google_fit_location (user_id VARCHAR(50),session_id INT,step_id INT,day INT,latitude FLOAT,longitude FLOAT,altitude FLOAT);
INSERT INTO google_fit_location (user_id, session_id, step_id, day, latitude, longitude, altitude)VALUES('user_1', 101, 1, 1, 37.7749, -122.4194, 15.0),('user_1', 101, 2, 1, 37.7750, -122.4195, 15.5),('user_1', 101, 3, 1, 37.7751, -122.4196, 16.0),('user_1', 102, 1, 1, 34.0522, -118.2437, 20.0),('user_1', 102, 2, 1, 34.0523, -118.2438, 20.5),('user_2', 201, 1, 1, 40.7128, -74.0060, 5.0),('user_2', 201, 2, 1, 40.7129, -74.0061, 5.5),('user_2', 202, 1, 1, 51.5074, -0.1278, 10.0),('user_2', 202, 2, 1, 51.5075, -0.1279, 10.5),('user_3', 301, 1, 1, 48.8566, 2.3522, 25.0),('user_3', 301, 2, 1, 48.8567, 2.3523, 25.5);
-- Solution
WITH SessionMinMax AS (
SELECT
user_id,
session_id,
day,
MIN(step_id) AS start_step_id,
MAX(step_id) AS end_step_id,
MIN(latitude) AS start_lat,
MIN(longitude) AS start_lon,
MAX(latitude) AS end_lat,
MAX(longitude) AS end_lon
FROM (
SELECT
user_id,
session_id,
day,
step_id,
latitude,
longitude,
ROW_NUMBER() OVER (PARTITION BY user_id, session_id, day ORDER BY step_id ASC) AS row_num_asc,
ROW_NUMBER() OVER (PARTITION BY user_id, session_id, day ORDER BY step_id DESC) AS row_num_desc
FROM google_fit_location
) t
WHERE row_num_asc = 1 OR row_num_desc = 1
GROUP BY user_id, session_id, day
),
Distances AS (
SELECT
user_id,
session_id,
day,
6371 * ACOS(
SIN(RADIANS(start_lat)) * SIN(RADIANS(end_lat)) +
COS(RADIANS(start_lat)) * COS(RADIANS(end_lat)) * COS(RADIANS(end_lon - start_lon))
) AS distance_curved,
SQRT(POWER(end_lat - start_lat, 2) + POWER(end_lon - start_lon, 2)) * 111 AS distance_flat
FROM SessionMinMax
)
SELECT
AVG(distance_curved) AS avg_distance_curved,
AVG(distance_flat) AS avg_distance_flat,
AVG(distance_curved) - AVG(distance_flat) AS difference
FROM Distances;
To embed this program on your website, copy the following code and paste it into your website's HTML: