Delete Duplicate Data (SQL)

Fork
----- Scenario 1: Data duplicated based on SOME of the columns

-- Requirement: Delete duplicate data from cars table. Duplicate record is identified based on the model and brand name.

create table if not exists cars
(
    id      int,
    model   varchar(50),
    brand   varchar(40),
    color   varchar(30),
    make    int
);
insert into cars values (1, 'Model S', 'Tesla', 'Blue', 2018);
insert into cars values (2, 'EQS', 'Mercedes-Benz', 'Black', 2022);
insert into cars values (3, 'iX', 'BMW', 'Red', 2022);
insert into cars values (4, 'Ioniq 5', 'Hyundai', 'White', 2021);
insert into cars values (5, 'Model S', 'Tesla', 'Silver', 2018);
insert into cars values (6, 'Ioniq 5', 'Hyundai', 'Green', 2021);

/*Select * from cars*/
/*Order by Model, brand*/

-->> SOLUTION 1: Delete using Unique identifier
/*Delete from cars
where id in(
Select max(id)
from cars
group by model, brand
having count(*)>1)*/



-->> SOLUTION 2: Using SELF join
/*Delete from cars
where id in(
Select c2.id from cars c1
join cars c2
on c1.model = c2.model and c1.brand = c2.brand
and c1.id<c2.id)*/

-->> SOLUTION 3: Using Window function
/*delete from cars
where id in (
Select id from(
Select *,
row_number() over(partition by model, brand) as row_num
from cars)x
where x.row_num>1)*/

-->> SOLUTION 4: Using MIN function. This delete even multiple duplicate records.
/*delete from cars
where id not in(
Select MIN(id) as id
from cars
group by model, brand)*/

-->> SOLUTION 5: Using backup table.

/*create table cars_bkps
as
select * from cars where 1=2;

insert into cars_bkps
Select * from cars
where id in (
Select min(id)
from cars*/
group by model, brand)

/*drop table cars

alter table cars_bkps rename to cars*/*/




-->> SOLUTION 6: Using backup table without dropping the original table.
/*
create table cars_backup
as
Select * from cars where 1=2

Insert into cars_backup
Select * from cars
where id in (
Select max(id)
from cars
group by model, brand
having count(*)>1)

truncate table cars

insert into cars
Select * from cars_backup

drop table cars_backup*/


----Scenario 2: Data duplicated based on ALL of the columns
-- Requirement: Delete duplicate entry for a car in the CARS table.

/*drop table if exists cars;
create table if not exists cars
(
    id      int,
    model   varchar(50),
    brand   varchar(40),
    color   varchar(30),
    make    int
);
insert into cars values (1, 'Model S', 'Tesla', 'Blue', 2018);
insert into cars values (2, 'EQS', 'Mercedes-Benz', 'Black', 2022);
insert into cars values (3, 'iX', 'BMW', 'Red', 2022);
insert into cars values (4, 'Ioniq 5', 'Hyundai', 'White', 2021);
insert into cars values (1, 'Model S', 'Tesla', 'Blue', 2018);
insert into cars values (4, 'Ioniq 5', 'Hyundai', 'White', 2021);
*/

/*select * from cars;*/


-->> SOLUTION 1: Delete using CTID / ROWID (in Oracle)

/*Delete cars
where ctid in (
select max(CTID)
from cars
group by model, brand
having count(*)>1)*/

-->> SOLUTION 2: By creating a temporary unique id column

/*alter table cars add column row_num int generated always as identity

Delete from cars
where id in(
select max(row_num)
from cars
group by model, brand
having count(*)>1)*/



-->> SOLUTION 3: By creating a backup table.
/*create table cars_bks
as
Select distinct * from cars*/
/*
drop table cars

alter table cars_backup rename to cars*/

-->> SOLUTION 4: By creating a backup table without dropping the original table.

/*create table cars_bks1
as
Select distinct * from cars

truncate table cars

insert into cars
Select * from cars_bks1

drop table cars_bks1*/
Fork
Embed on website