.NET

How to read Excel files without having Excel installed on your machine using .NET ?

Posted on Updated on

Luckily, there is a way to read from Excel files that has (*.xlsx) or (*.xls) extensions, without having Office Excel installed on your machine.

This way can be used with .NET Windows, or Webs, also can be used with PowerShell, however, I will address below how to use it with .NET only in details, and will give some guidance how to use it with PowerShell

1. Open your visual studio, and Create a new .NET Project (web app, or windows app, or other) with .NET v4.0 or more
2. Create a new Class, and lets name it “ExcelAdapter.cs”
3. Paste the blow block in “ExcelAdapter.cs”

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.Data;
using System.Globalization;

namespace ExcelReader
{
    public class ExcelAdapter
    {
        public static DataTable ReadExcel(string path, string sheetName)
        {
            var data = new DataTable();
            using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(path, false))
            {
                // Get the worksheet we are working with
                var sheets = spreadsheetDocument.WorkbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName);
                var worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(sheets.First().Id);
                var worksheet = worksheetPart.Worksheet;
                var sstPart = spreadsheetDocument.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
                var ssTable = sstPart.SharedStringTable;
                // Get the CellFormats for cells without defined data types
                var workbookStylesPart = spreadsheetDocument.WorkbookPart.GetPartsOfType<WorkbookStylesPart>().First();
                var cellFormats = workbookStylesPart.Stylesheet.CellFormats;

                ExtractRowsData(data, worksheet, ssTable, cellFormats);
            }
            return data;
        }
        private static void ExtractRowsData(DataTable data, Worksheet worksheet, SharedStringTable ssTable, CellFormats cellFormats)
        {
            var columnHeaders = worksheet.Descendants<Row>().First().Descendants<Cell>().Select(c => Convert.ToString(ProcessCellValue(c, ssTable, cellFormats))).ToArray();
            var columnHeadersCellReference = worksheet.Descendants<Row>().First().Descendants<Cell>().Select(c => c.CellReference.InnerText.Replace("1", string.Empty)).ToArray();
            var spreadsheetData = from row in worksheet.Descendants<Row>()
                                  where row.RowIndex > 1
                                  select row;
            foreach (string columnHeader in columnHeaders)
            {
                data.Columns.Add(columnHeader);
            }
            foreach (var dataRow in spreadsheetData)
            {
                var newRow = data.NewRow();
                for (int i = 0; i < columnHeaders.Length; i++)
                {
                    // Find and add the correct cell to the row object
                    var cell = dataRow.Descendants<Cell>().Where(c => c.CellReference == columnHeadersCellReference[i] + dataRow.RowIndex).FirstOrDefault();
                    if (cell != null)
                        newRow[columnHeaders[i]] = ProcessCellValue(cell, ssTable, cellFormats);
                }
                if (!newRow.ItemArray.All(field =>
                {
                    string s = null;
                    if (field != null)
                        s = field.ToString();
                    return string.IsNullOrEmpty(s);
                }))
                    data.Rows.Add(newRow);
            }
        }
        // Process the valus of a cell and return a .NET value
        private static Func<Cell, SharedStringTable, CellFormats, Object> ProcessCellValue =
            (c, ssTable, cellFormats) =>
            {
                if (c.CellValue == null) return null;
                // If there is no data type, this must be a string that has been formatted as a number
                if (c.DataType == null)
                {
                    if (c.StyleIndex == null) return c.CellValue.Text.Trim();
                    var cf =
                        cellFormats.Descendants<CellFormat>()
                                   .ElementAt<CellFormat>(Convert.ToInt32(c.StyleIndex.Value));
                    if (cf.NumberFormatId >= 0 && cf.NumberFormatId <= 13) // This is a number
                        return double.Parse(c.CellValue.Text, NumberStyles.Any);
                    if (cf.NumberFormatId >= 14 && cf.NumberFormatId <= 22) // This is a date
                        return DateTime.FromOADate(Convert.ToDouble(c.CellValue.Text));
                    return c.CellValue.Text.Trim();
                }
                switch (c.DataType.Value)
                {
                    case CellValues.SharedString:
                        return ssTable.ChildElements[Convert.ToInt32(c.CellValue.Text)].InnerText.Trim();
                    case CellValues.Boolean:
                        return c.CellValue.Text == "1";
                    case CellValues.Date:
                        return DateTime.FromOADate(Convert.ToDouble(c.CellValue.Text));
                    case CellValues.Number:
                        return double.Parse(c.CellValue.Text, NumberStyles.Any);
                    default:
                        return c.CellValue != null ? c.CellValue.Text : string.Empty;
                }
            };
        public static List<string> GetSheet(string filename)
        {
            DataTable dataTable = new DataTable();
            List<string> list = new List<string>();
            using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false))
            {
                IEnumerable<Sheet> enumerable = spreadsheetDocument.WorkbookPart.Workbook.Descendants<Sheet>();
                foreach (Sheet current in enumerable)
                {
                    list.Add(current.Name);
                }
            }
            return list;
        }
    }
}

4. This is an open source library, To install it you need to open “Package Manage Console” in visual studio and then run this command : Install-Package DocumentFormat.OpenXml -Version 2.9.1

PackageManagerConsole2.png

5. If you cannot see the Package manage console, please show it from Tools > NuGet Package Manager > Package Manager Console

PackageManagerConsole.png

6. If you prefer to do it manually, you can search google on DocumentFormat.OpenXml, download the .dll, and use add to reference

7. Now the Document Format should be installed in your project, check the references to confirm

8. Rebuild the project, should be no errors

9. To start using the functions create a new form, or class, and start using the functions, this function will return list of sheets in the excel file

List<string> sheets = ExcelAdapter.GetSheet(@"C:\temp\file.xlsx");

10. Now we will get the data in one of the sheets, and load it to a datatable, then to DataGrid, to do this write the below

DataTable dt = ExcelAdapter.ReadExcel(@"C:\temp\file.xlsx", "Sheet1");//please be aware that Sheet1 is case sensitive!
 this.dataGridView1.DataSource = dt;
 //Please don't forget to add databind if you are using with asp.net
Grid.png

12. and you will have your Dataload as below

If you want to use it with PowerShell, it will be a little bit more complicated, you will need first to, convert this module to *.dll then reference it to your PowerShell script like in this link
https://activedirectoryfaq.com/2016/01/use-net-code-c-and-dlls-in-powershell/

Advertisements