I am really getting clueless here. I import ~2million rows into my azure sql database. I create temp table where I put my values, when I use merge technique to insert only rows that do not have duplicates. My code and scripts are below.
What I have tried:
public async Task BulkImportWithoutDuplicates(DataTable reader)
{
var tableName = "##tempImport";
using (var connection = new SqlConnection(sqlCOnn.ConnectionString))
{
using (SqlCommand command = new SqlCommand("", sqlCOnn))
{
try
{
sqlCOnn.Open();
command.CommandText = Scripts.GetTempTableScript();
command.ExecuteNonQuery();
using (SqlBulkCopy b = new SqlBulkCopy(conString, SqlBulkCopyOptions.TableLock))
{
b.BulkCopyTimeout = 0;
b.BatchSize = reader.Rows.Count;
b.DestinationTableName = tableName;
await b.WriteToServerAsync(reader);
b.Close();
}
command.CommandText = Scripts.GetMergeScript();
var rows = command.ExecuteNonQuery();
}
catch (Exception ex)
{
}
finally
{
connection.Close();
}
}
}
}
public static string GetTempTableScript()
{
return $@"
IF OBJECT_ID('tempdb.dbo.##tempImport', 'U') IS NOT NULL
BEGIN
DROP TABLE ##tempImport;
END
CREATE TABLE ##tempImport ( ... all the columns);";
}
public static string GetMergeScript()
{
return $@"MERGE
INTO dbo.Data AS target
USING ##tempImport AS source
ON (source.TransactionId = target.TransactionId AND source.UserId = target.UserId)
WHEN NOT MATCHED THEN
INSERT (Start, Spend, UserId, Product, Shop, ClientId, UploadDataId, UniqueId, TransactionId, q, cq, c2)
VALUES (source.Start, source.Spend, source.UserId, source.Product, source.Shop,
source.ClientId, source.UploadDataId, source.UniqueId, source.TransactionId, source.q, source.c1, source.c2);
";
}
I really do not get why it takes ages until it finishes. I waited for 24minutes until it was added to temporary table alone. I was reading this article and it seems that it shouldn't take long. https://www.adathedev.co.uk/2011/01/sqlbulkcopy-to-sql-server-in-parallel.html?m=1
What I am doing wrong here? How Can I improve the speed?