A problem that I encounter very often in practice is finding duplicates from a table and eliminate them.
Since I found a nice way to eliminate them, I thought that I could share.
Since I found a nice way to eliminate them, I thought that I could share.
-- create table
create table duplicates_example
(
duplicates_example_id numeric(10) not null primary key,
firstname varchar(255),
lastname varchar(255),
phone varchar(255),
email varchar(255)
);
-- create sequence
CREATE SEQUENCE duplicates_example_seq START 1000000 OWNED BY duplicates_example.duplicates_example_id;
Now, let's populate the table.
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName1', 'LastName1', '12345', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName1', 'LastName1', '12345', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName1', 'LastName2', '12345', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName2', 'LastName2', '123457', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName2', 'LastName2', '123457', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName2', 'LastName2', '123457', 'test@tes.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName3', '523457', 'test3@tes3.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName3', '523457', 'test3@tes3.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName3', '523457', 'test3@tes3.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName3', '523457', 'test3@tes3.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName4', '523457', 'test3@tes3.com';
insert into duplicates_example select distinct nextval('duplicates_example_seq'), 'FirstName3', 'LastName4', '523457', 'test3@tes3.com';
Query the table:
select * from duplicates_example order by lastname;
We get:
Now, let's write a query for finding the duplicates.
(
SELECT duplicates_example_id,firstname, lastname, phone, email,
ROW_NUMBER() OVER(PARTITION BY firstname, lastname, phone, email
ORDER BY lastname ) AS Row
FROM duplicates_example
) dups
WHERE dups.Row > 1;
So, now we have the duplicates and we can delete them. We shall store the duplicates in a table.
CREATE TABLE dups AS
SELECT * FROM
(
SELECT duplicates_example_id,firstname, lastname, phone, email,
ROW_NUMBER() OVER(PARTITION BY firstname, lastname, phone, email
ORDER BY lastname ) AS Row
FROM duplicates_example
) dups
WHERE dups.Row > 1;
Now we shall delete the duplicates:
delete from duplicates_example
where exists (select 1 from dups d where d.duplicates_example_id = duplicates_example.duplicates_example_id);
Let's query the table again:
select * from duplicates_example;
And see the results: