Home > Code Samples > Bulk Insert XML File in C#

Bulk Insert XML File



Download LoadDiggRSS.cs (Right Click->Save Target As...).

The code below shows how to use the XML functionality of the DataStreams framework to bulk insert an XML file of any size into SQL Server. The code downloads the current popular topics from the programming channel on Digg and inserts the records into a table in SQL Server. The code also makes use of the ValidatingDataReader from Creating a Validating DataReader for SqlBulkCopy.

C#
using System;
using System.Data.SqlClient;
using System.IO;
using System.Net;
using System.Text;

using DataStreams.ETL;
using DataStreams.Xml;

namespace LargeXMLReader
{
	public class LoadDiggRSS
	{
		static void Main(string[] args)
		{
			using (SqlConnection dbConn = new SqlConnection(@"Data Source=.;Initial Catalog=Samples;Integrated Security=True"))
			{
				dbConn.Open();

				using (SqlTransaction tran = dbConn.BeginTransaction())
				{
					try
					{
						using (SqlCommand createTable = new SqlCommand(@"
							IF NOT EXISTS
							(
								SELECT
									*
								FROM
									sys.tables t
								WHERE
									t.[name] = 'DiggItem'
							)
							BEGIN
								CREATE TABLE dbo.DiggItem
								(
									digg_item_id INT IDENTITY(1,1) NOT NULL,
									title VARCHAR(MAX) NOT NULL,
									link VARCHAR(MAX) NOT NULL,
									description VARCHAR(MAX) NOT NULL,
									publish_date DATETIME NOT NULL,
									digg_count INT NOT NULL,
									category VARCHAR(MAX) NOT NULL,
									comment_count INT NOT NULL,
									CONSTRAINT PK_DiggItem PRIMARY KEY CLUSTERED
									(
										digg_item_id
									)
								);
							END
							",
							dbConn,
							tran))
						{
							createTable.ExecuteNonQuery();
						}

						HttpWebRequest request = HttpWebRequest.Create(
							"http://feeds.digg.com/digg/topic/programming/popular.rss") as HttpWebRequest;

						using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)
						using (Stream responseStream = response.GetResponseStream())
						using (XmlDataReader reader = new XmlDataReader(
							responseStream,
							Encoding.UTF8,
							"atom:feed/atom:entry",
							LoadMethod.Streaming))
						using (SqlBulkCopy bcp = new SqlBulkCopy(
							dbConn,
							SqlBulkCopyOptions.Default,
							tran))
						{
							reader.AddNamespace("digg", "http://digg.com/docs/diggrss/");
							reader.AddNamespace("media", "http://search.yahoo.com/mrss/");
							reader.AddNamespace("feedburner", "http://rssnamespace.org/feedburner/ext/1.0");
							reader.AddNamespace("atom", "http://www.w3.org/2005/Atom");

							reader.Columns.Add("VARCHAR", "atom:title", "title");
							reader.Columns.Add("VARCHAR", "atom:link/@href", "link");
							reader.Columns.Add("VARCHAR", "atom:content", "description");
							reader.Columns.Add("DATETIME", "atom:updated", "pubDate");
							reader.Columns.Add("INT", "digg:diggCount", "diggCount");
							reader.Columns.Add("VARCHAR", "digg:category", "category");
							reader.Columns.Add("INT", "digg:commentCount", "commentCount");

							bcp.DestinationTableName = "dbo.DiggItem";
							bcp.BatchSize = 1000;
							bcp.BulkCopyTimeout = 300;

							bcp.ColumnMappings.Add("title", "title");
							bcp.ColumnMappings.Add("link", "link");
							bcp.ColumnMappings.Add("description", "description");
							bcp.ColumnMappings.Add("pubDate", "publish_date");
							bcp.ColumnMappings.Add("diggCount", "digg_count");
							bcp.ColumnMappings.Add("category", "category");
							bcp.ColumnMappings.Add("commentCount", "comment_count");

							using (ValidatingDataReader validator = new ValidatingDataReader(
								reader,
								dbConn,
								bcp,
								tran))
							{
								bcp.WriteToServer(reader);
							}
						}

						tran.Commit();
					}
					catch
					{
						try
						{
							tran.Rollback();
						}
						catch
						{

						}

						throw;
					}
				}
			}

			Console.WriteLine("Done.");
			Console.ReadLine();
		}
	}
}