Friday, 28 November 2008

Your very own CSV file parser

So I recently found that apparently there's no support for the Microsoft Text Driver on 64 bit machines. In a nutshell, that meant all my applications which relied on it to parse CSV files and return them as a DataTable no longer worked. So up went the sleeves and out came this code snippet to manually do that parsing. This code makes the assumption that the CSV file is produced by Excel, the first row is a header row and there are no "s to encapsulate cells in the header row (all assumptions I could make in the process of writing this because I needed a quick fix). Hopefully this is easy enough to understand and modify for your own needs, but feel free to ask questions!



            
            
string COMMA = "INCCOMMAREPLACEMENT57";
            
string SEPA = "INCSEPARATOR57";
            
string[] splitstring = new string[] { SEPA };

            
DataTable t = new DataTable();
            
string[] lines = File.ReadAllLines(csvDirectory + "\\" + csvFileName);//read in file
          
            //first line is header
            
string[] headers = lines[0].Split(',');
          
          
//for each subsequent row
            //for row 0: add them as column headers - we know line 0 has no ""            
            
foreach (string s in headers)
            
{
                t.Columns.Add
(s, typeof(string));
            
}

            
//foreach line
            
for (int i = 1; i < lines.Length; i++)
            
{                
                
int first = 9999;
                
int second = 9999;
                
string s = lines[i];

                
//continue doing this until no more "" in the line
                
while (first > -1)
                
{
                    
//search for "
                    
first = s.IndexOf('"');

                    
if (first > 0)
                    
{
                        
//search for closing "
                        
second = s.IndexOf('"', first + 1);
                        
string subS = s.Substring(first, second - first + 1);

                        
//between the two "" find all , and replace with INCCOMMAREPLACEMENT57
                        
string subS2 = subS.Replace(",", COMMA);

                        
//remove the two ""
                        
subS2 = subS2.Replace("\"", "");

                        s = s.Replace(subS, subS2);
                    }
                    else
                    {
                        break;
                    }
                }
                //replace all , with INCSEPARATOR57
                s = s.Replace("
,", SEPA);
                
                //replace INCCOMMAREPLACEMENT57 with ,
                s = s.Replace(COMMA, "
,");

                //split the line using INCSEPARATOR57
                string[] data = s.Split(splitstring, StringSplitOptions.None);
                
                DataRow r = t.NewRow();

                for (int j = 0; j < data.Length; j++)
                {
                    r[j] = data[j];                    
                }
                t.Rows.Add(r);
            }
return t;

No comments: