Here's an alternative to Solution 1 which I believe is easier to follow. I created the test data as this:
create table #test
(id int identity(1,1),phone nvarchar(123),Email nvarchar(123),Person int)
insert into #test (phone, Email) values
('111', 'abc@gmail.com'),
('112', 'abc@gmail.com'),
('112', 'pqr@hotmail.com'),
('113', 'abc@gmail.com'),
('211', 'someoneelse@live.com')
First use a SQL Window function to give us proposed "Person" numbers based only on the email address:
UPDATE T SET Person = D.RN FROM #test T
INNER JOIN
(SELECT id, DENSE_RANK() OVER (ORDER BY Email) AS RN FROM #test) D on D.id = T.ID
This is the contents of #test after that:
id Phone Email Person
1 111 abc@gmail.com 1
2 112 abc@gmail.com 1
3 112 pqr@hotmail.com 2
4 113 abc@gmail.com 1
5 211 someoneelse@live.com 3
This next statement corrects the situation for the Phone number ... i.e. it will replace Person = 2 with Person = 1 because they are the same "person". I've use
MIN
to get the earlier
Person
number but you could just as easily use
MAX
:
UPDATE T SET Person = newPerson
from (
SELECT PHONE, min(person) as newPerson FROM #test
GROUP BY phone having count(*) > 1) AS q
inner join #test t on q.phone=t.phone
The contents of #test after that are:
id Phone Email Person
1 111 abc@gmail.com 1
2 112 abc@gmail.com 1
3 112 pqr@hotmail.com 1
4 113 abc@gmail.com 1
5 211 someoneelse@live.com 3
You could just stop there as each person has a unique number (1 and 3) but if you really want the numbers to be consecutive then run this last piece. It renumbers each distinct
Person
using
RANK
but you could just as easily use
ROW_NUMBER()
UPDATE T SET Person = Q2.NP
FROM #test T
INNER JOIN (select Person, RANK() OVER (ORDER BY Person) AS NP FROM (select distinct Person from #test) Q1) Q2 ON Q2.Person=T.Person
The final results are
id Phone Email Person
1 111 abc@gmail.com 1
2 112 abc@gmail.com 1
3 112 pqr@hotmail.com 1
4 113 abc@gmail.com 1
5 211 someoneelse@live.com 2
Here is the complete code done as one-liners to make it look shorter ;-)
create table #test (id int identity(1,1),phone nvarchar(123),Email nvarchar(123),Person int)
insert into #test (phone, Email) values
('111', 'abc@gmail.com'),
('112', 'abc@gmail.com'),
('112', 'pqr@hotmail.com'),
('113', 'abc@gmail.com'),
('211', 'someoneelse@live.com')
UPDATE T SET Person = D.RN FROM #test T INNER JOIN ( SELECT id, DENSE_RANK() OVER (ORDER BY Email) AS RN FROM #test) D on D.id = T.ID
UPDATE T SET Person = newPerson from (SELECT PHONE, min(person) as newPerson FROM #test GROUP BY phone having count(*) > 1) AS q inner join #test t on q.phone=t.phone
UPDATE T SET Person = Q2.NP FROM #test T INNER JOIN (select Person, ROW_NUMBER() OVER (ORDER BY Person) AS NP FROM (select distinct Person from #test) Q1) Q2 ON Q2.Person=T.Person
select * from #TEST
DROP TABLE #test