Introduction
There are two ways to change cell values in an Excel spreadsheet: the wrong way and the fast way. The wrong way takes the form of a loop where each cell is individually accessed in a for
loop. Each time you assign a value, your add-in makes a COM call into Excel. Calling COM object methods from C# or anything for that matter, is a slow process. It might be 10ms or so (just a guess) but if you were looping through 200k cells, that becomes 2000 seconds of COM calls.
Exhibit 1: The slow way of doing things in Excel
int i = 1;
foreach(Excel.Range cell in cells)
{
cell.Value2 = foo(i);
}
The fast way is to work with arrays and assign values to many cells at once in one COM call. Instead of assigning a value to each cell in a loop, we can assign values to an array, then assign the value of the entire range to the array. In exhibit 2, we create a two dimensional array buffer[1 .. 200000, 1 .. 1], then calculate the values in the for
loop. The reason this is faster is that the for
loop is not calling Excel, it’s all .NET and once we’ve calculated all the values, we call Excel once to set the values of the cells.
Exhibit 2: The fast way
int i = 1;
int bufferSize = cells.Rows.Count;int[] lowerBounds = new int[] {1,1};
int[] lengths = new int[] {bufferSize, 1};
object[,] buffer = Array.CreateInstance(typeof(object),
lengths, lowerBounds) as object[,];
for(i = 1; i <= bufferSize; i++)
{
buffer[i,1] = foo(i);
}
cells.Value2 = buffer;
The Buffer Class
Now the problem with this is that the buffer is potentially using a lot of memory (imagine if it were text values). Instead of reducing it to one call, we can compromise and use a buffer so instead of doing one big write, we do 10 instead for instance. To make things easier, we can wrap up the logic in a utility class shown in exhibit 3.
Exhibit 3: The ExcelColumnBuffer class
using System;
using System.Collections.Generic;
using System.Text;
using Excel = Microsoft.Office.Interop.Excel;
using System.Runtime.InteropServices;
namespace xlspot.Tools
{
public enum ExcelBufferMode
{
Write = 1,
Overwrite = 2
}
public class ExcelColumnBuffer
{
#region Private Members
private int top = 0;
private int left = 0;
private int bufferSize = 0;
private object[,] buffer = null;
private ExcelBufferMode mode = ExcelBufferMode.Overwrite;
private Excel._Worksheet sheet = null;
#endregion
#region Constructors
public ExcelColumnBuffer(int nColumnSize,
ExcelBufferMode bufferMode = ExcelBufferMode.Write)
{
bufferSize = nColumnSize;
int[] lowerBounds = new int[] {1,1};
int[] lengths = new int[] {bufferSize,1};
buffer = Array.CreateInstance(typeof(object),
lengths, lowerBounds) as object[,];
mode = bufferMode;
}
#endregion
#region Public Methods
public void SetSheet(Excel._Worksheet sh)
{
sheet = sh;
}
public object Read(int y, int x)
{
int oy = 1;
Excel.Range cells = null;
List<object> comObjs = new List<object>();
if (x != left || !Translate(y, out oy))
{
Flush();
top = y;
left = x;
try
{
comObjs.Add(cells = sheet.Cells);
comObjs.Add(cells = cells[top, left] as Excel.Range);
comObjs.Add(cells = cells.Resize[bufferSize]);
buffer = cells.Value as object[,];
oy = 1;
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
}
return buffer[oy,1];
}
public void Write(int y, int x, object value)
{
int oy;
if (x != left || !Translate(y, out oy))
{
if (mode == ExcelBufferMode.Write)
{
Read(y, x);
}
else if (mode == ExcelBufferMode.Overwrite)
{
Flush();
top = y;
left = x;
}
Translate(y, out oy);
}
buffer[oy,1] = value;
}
public void Flush()
{
if (left == 0)
return;
Excel.Range cells = null;
List<object> comObjs = new List<object>();
try
{
comObjs.Add(cells = sheet.Cells);
comObjs.Add(cells = cells[top, left] as Excel.Range);
comObjs.Add(cells = cells.Resize[bufferSize]);
cells.Value = buffer;
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
if (mode == ExcelBufferMode.Overwrite)
{
Array.Clear(buffer, 1, bufferSize);
}
}
#endregion
#region Private Helper Methods
private bool Translate(int y, out int oy)
{
oy = y - top + 1;
if (oy >= 1 && oy <= bufferSize)
return true;
else
return false;
}
#endregion
#region Indexer
public object this[int y, int x]
{
get
{
return Read(y, x);
}
set
{
Write(y, x, value);
}
}
#endregion
}
}
The buffer class is column based so to use it effectively, access the spreadsheet top to bottom first, then sideways. See exhibit 4. The example in exhibit 5 uses the buffer class to determine if a range contains part of a Fibonacci sequence. I’m not a mathematician, so I’m sure there’s a better way to do it, but then again this is very fast.
Exhibit 4: Use this pattern to access cells through the buffer. If you can’t do that, then you need a different buffer class.
Exhibit 5: An example of using the ExcelColumnBuffer class to verify that the selected range contains part of a Fibonacci sequence.
private bool IsSelectionFibonacci()
{
Excel.Range selection = null, cells = null;
double d = 0.0, d1 = 0.0, d2 = 0.0, ds = 0.0;
double v;
int y, x, h, w, right, bottom;
List<object> comObjs = new List<object>();
try
{
comObjs.Add(selection = ExcelApp.Selection as Excel.Range);
if (selection != null)
{
ExcelColumnBuffer buffer = new ExcelColumnBuffer(100);
buffer.SetSheet(ExcelApp.ActiveSheet as Excel._Worksheet);
y = selection.Row;
x = selection.Column;
comObjs.Add(cells = selection.Rows);
h = cells.Count;
comObjs.Add(cells = selection.Columns);
w = cells.Count;
v = (double)buffer.Read(y, x);
right = x + w - 1;
bottom = y + h - 1;
ds = v;
if (v > 1)
{
d = 1.0;
d1 = 1.0;
d2 = 0.0;
while (d < v)
{
d = d1 + d2;
d2 = d1;
d1 = d;
}
if (d != v)
{
return false;
}
}
else
{
d1 = v;
}
++y;
for (; x <= right; x++)
{
for (; y <= bottom; y++)
{
v = (double)buffer[y, x];
d = d1 + d2;
if (v == d)
{
d2 = d1;
d1 = d;
}
else if (v == 1.0 && d1 == 0.0 && ds == 0.0)
{
d2 = d1;
d1 = v;
}
else if (v == 2.0 && d1 == 1.0 && ds == 1.0)
{
d2 = d1;
d1 = v;
}
else
{
return false;
}
}
y = selection.Row;
}
return true;
}
return false;
}
finally
{
foreach (object o in comObjs)
{
if (o != null)
Marshal.ReleaseComObject(o);
}
}
}
Conclusion
Although this buffer class is limited to column based access, it’s possible to extend it to use a row based buffer, or a two dimensional buffer. For applications that access many cell values, this makes a huge difference in performance.
CodeProject