/*
Google SQL Interview Question
We want to identify the most suspicious claims in each state.
We'll consider the top 5 percentile of claims with the highest fraud scores in each state as potentially fraudulent.
*/
-- Schema Setup
CREATE TABLE claims (policy_number VARCHAR(50), state VARCHAR(50), claim_cost FLOAT, fraud_score FLOAT);
INSERT INTO claims (policy_number, state, claim_cost, fraud_score) VALUES ('POL123', 'CA', 10000.00, 85.5), ('POL124', 'CA', 5000.00, 70.2), ('POL125', 'CA', 20000.00, 92.8), ('POL126', 'NY', 15000.00, 88.1), ('POL127', 'NY', 8000.00, 65.4), ('POL128', 'NY', 25000.00, 93.7), ('POL129', 'TX', 12000.00, 75.3), ('POL130', 'TX', 18000.00, 95.2), ('POL131', 'TX', 9000.00, 60.0), ('POL132', 'FL', 11000.00, 82.0), ('POL133', 'FL', 14000.00, 87.5), ('POL134', 'FL', 30000.00, 99.0);
-- Solution
WITH ranked_percentiles AS
(
SELECT
policy_number,
state,
claim_cost,
fraud_score,
PERCENT_RANK() OVER(PARTITION BY state ORDER BY fraud_score DESC) AS percentile
FROM claims
)
SELECT
policy_number,
state,
claim_cost,
fraud_score
FROM ranked_percentiles
WHERE percentile >= 0.95;
To embed this program on your website, copy the following code and paste it into your website's HTML: