using System; using System.Data.SqlClient; using System.IO; using System.Net; using System.Text; using DataStreams.ETL; using DataStreams.Xml; namespace LargeXMLReader { public class LoadDiggRSS { static void Main(string[] args) { // // // Stories // Stories // 2011-10-05T11:30:29Z // http://services.digg.com/ // // Digg // // // How Magazines Are Adapting to the Mobile Revolution // // Magazine editors discuss early lessons from tablet publishing, and the challenges of extending their content to a proliferating number of tablet sizes and platforms going forward.<img src="http://feeds.feedburner.com/~r/digg/topic/programming/popular/~4/1evIqO7R2kU" height="1" width="1"/> // 2011-10-04T22:14:11Z // // 131 // // // Technology // // // 1 // // // // // // // // // http://digg.com/news/technology/how_magazines_are_adapting_to_the_mobile_revolution // http://digg.com/news/technology/how_magazines_are_adapting_to_the_mobile_revolution?utm_campaign=Feed%3A+http%3A%2F%2Fservices.digg.com%2F&utm_medium=feed&utm_source=diggapi // ... // using (SqlConnection dbConn = new SqlConnection(@"Data Source=.;Initial Catalog=Samples;Integrated Security=True")) { dbConn.Open(); using (SqlTransaction tran = dbConn.BeginTransaction()) { try { using (SqlCommand createTable = new SqlCommand(@" IF NOT EXISTS ( SELECT * FROM sys.tables t WHERE t.[name] = 'DiggItem' ) BEGIN CREATE TABLE dbo.DiggItem ( digg_item_id INT IDENTITY(1,1) NOT NULL, title VARCHAR(MAX) NOT NULL, link VARCHAR(MAX) NOT NULL, description VARCHAR(MAX) NOT NULL, publish_date DATETIME NOT NULL, digg_count INT NOT NULL, category VARCHAR(MAX) NOT NULL, comment_count INT NOT NULL, CONSTRAINT PK_DiggItem PRIMARY KEY CLUSTERED ( digg_item_id ) ); END ", dbConn, tran)) { createTable.ExecuteNonQuery(); } HttpWebRequest request = HttpWebRequest.Create( "http://feeds.digg.com/digg/topic/programming/popular.rss") as HttpWebRequest; using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) using (Stream responseStream = response.GetResponseStream()) using (XmlDataReader reader = new XmlDataReader( responseStream, Encoding.UTF8, "atom:feed/atom:entry", LoadMethod.Streaming)) using (SqlBulkCopy bcp = new SqlBulkCopy( dbConn, SqlBulkCopyOptions.Default, tran)) { reader.AddNamespace("digg", "http://digg.com/docs/diggrss/"); reader.AddNamespace("media", "http://search.yahoo.com/mrss/"); reader.AddNamespace("feedburner", "http://rssnamespace.org/feedburner/ext/1.0"); reader.AddNamespace("atom", "http://www.w3.org/2005/Atom"); reader.Columns.Add("VARCHAR", "atom:title", "title"); reader.Columns.Add("VARCHAR", "atom:link/@href", "link"); reader.Columns.Add("VARCHAR", "atom:content", "description"); reader.Columns.Add("DATETIME", "atom:updated", "pubDate"); reader.Columns.Add("INT", "digg:diggCount", "diggCount"); reader.Columns.Add("VARCHAR", "digg:category", "category"); reader.Columns.Add("INT", "digg:commentCount", "commentCount"); bcp.DestinationTableName = "dbo.DiggItem"; bcp.BatchSize = 1000; bcp.BulkCopyTimeout = 300; bcp.ColumnMappings.Add("title", "title"); bcp.ColumnMappings.Add("link", "link"); bcp.ColumnMappings.Add("description", "description"); bcp.ColumnMappings.Add("pubDate", "publish_date"); bcp.ColumnMappings.Add("diggCount", "digg_count"); bcp.ColumnMappings.Add("category", "category"); bcp.ColumnMappings.Add("commentCount", "comment_count"); using (ValidatingDataReader validator = new ValidatingDataReader( reader, dbConn, bcp, tran)) { bcp.WriteToServer(reader); } } tran.Commit(); } catch { try { tran.Rollback(); } catch { } throw; } } } Console.WriteLine("Done."); Console.ReadLine(); } } }