FileStreamer
Requirements:
- Be able to read all the lines in a given file.
- Be able to do so even if the file is HUGE ( == don’t load it all at once).
- Control the number of items I want to receive and whether or not the enumerator ignore empty lines. Always nice to have.
- Thread-safe should be supported easily. Think about 50 threads, each reading the next line and processing it.
- Nice performance is a plus.
Playing with the API with my teammate Ron gave the following (code written in notepad, stupidity won’t compile):
“Common foreach” usage:
foreach (string line in FileStreamer.GetLines(@”c:\temp\myfile.txt”, true, 1000)) { /* .. code */ } // read 1000 items from the file while ignoring empty lines.
Reading from multiple threads usage:
using (FileStreamer streamer = new FileStreamer(@”c:\temp\myfile.txt”, true, -1)) // -1 means no limit, read all non-empty lines
{
    Thread[] threads = new Thread[10];
    for(int i=0; i<threads.Length; i++)
    {
        threads[i] = new Thread((ThreadStart)delegate { 
            string line;
            if (!streamer.TryGetNextLine(out line)) // thread safe!
                return; // end of file, we can exit
            // do work …
        });  
        threads[i].Start();
    }  
    // join the threads + whatever …
}  
After reading a few ideas in stackoverflow, I thought to share my solution:
// written by bogen (30/12/2008)
#region using
using System;
using System.Collections.Generic;
using System.IO;
#endregion
namespace Semingo.Common.Utils
{
    /// <summary>
    /// Return a stream of lines for the specified file.
    /// This class is thread safe by design!
    /// Use the static method FileStreamer.GetLines for not thread safe usage (via foreach)
    /// </summary>
public class FileStreamer : IDisposable
    {
        #region fields
private readonly object _locker = new object();
private readonly string _path;
private readonly bool _ignoreEmptyLines;
private readonly int _limit;
private readonly IEnumerator<string> _enumerator;
private int _linesGiven;
private bool _disposed;
        #endregion
        #region ctors
        /// <summary>
        /// Create a file streamer instance
        /// </summary>
        /// <param name="path">File path</param>
public FileStreamer(string path) : this(path, false, -1)
        {
}
        /// <summary>
        /// Create a file streamer instance
        /// </summary>
        /// <param name="path">File path</param>
        /// <param name="ignoreEmptyLines">Should the streamer avoid empty lines</param>
        /// <param name="limit">Number of maximum lines the streamer should return. Send -1 for no limit</param>
public FileStreamer(string path, bool ignoreEmptyLines, int limit)
        {
            if (!File.Exists(path))
throw new ArgumentException("Cannot find the file: " + path);
            if (limit != -1 && limit <=0 )
throw new ArgumentException("Limit must be bigger than 0 (or -1 for no limit) but was: " + limit + ". File given was: " + path);
_path = path;
_ignoreEmptyLines = ignoreEmptyLines;
_limit = limit;
_enumerator = CreateStream().GetEnumerator();
}
        #endregion
#region public API
public bool TryGetNextLine(out string nextItem)
        {
            lock (_locker)
            {
return TryGetNextLineAssumingInsideLock(out nextItem);
}
}
public bool TryGetNextLines(out ICollection<string> nextItems, int howMany)
        {
            if (howMany <= 0)
throw new ArgumentException("'howMany' parameter must be > 0 but was " + howMany, "howMany");
nextItems = new List<string>(howMany);
            lock (_locker)
            {
                string nextItem;
for(int i=0; i<howMany; i++)
                {
if (!TryGetNextLineAssumingInsideLock(out nextItem))
break; // no more lines (EOF)
nextItems.Add(nextItem);
}
}
            return nextItems.Count > 0;
}
public static IEnumerable<string> GetLines(string path)
        {
return GetLines(path, false, -1);
}
        /// <summary>
        /// 
        /// </summary>
        /// <param name="path"></param>
        /// <param name="ignoreEmptyLines"></param>
        /// <param name="limit">send -1 for no limit</param>
        /// <returns></returns>
public static IEnumerable<string> GetLines(string path, bool ignoreEmptyLines, int limit)
        {
using (FileStreamer streamer = new FileStreamer(path, ignoreEmptyLines, limit))
            {
                string nextItem;
while (streamer.TryGetNextLine(out nextItem))
yield return nextItem;
yield break; // EOF
}
}
        ///<summary>
        ///Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
        ///</summary>
public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
}
        #endregion
#region private API
        /// <summary>
        /// Get the next line in the file.
        /// dev: assume that the lock is from the outside, by the caller (this is why it's a private method)
        /// </summary>
private bool TryGetNextLineAssumingInsideLock(out string nextItem)
        {
            nextItem = null;
            if (_linesGiven == _limit)
return false; // we reached the limit, no more please.
            if (!_enumerator.MoveNext())
return false; // end of stream (EOF)
nextItem = _enumerator.Current;
_linesGiven++;
return true;
}
private IEnumerable<string> CreateStream()
        {
using (FileStream fs = new FileStream(_path, FileMode.Open, FileAccess.Read, FileShare.Read, 1024, FileOptions.SequentialScan))
using (StreamReader reader = new StreamReader(fs))
            {
                string line;
while ((line = reader.ReadLine()) != null)
                {
if (_ignoreEmptyLines && line == string.Empty)
continue; // skip empty lines if needed
yield return line;
}
yield break;
}
}
protected void Dispose(bool disposing)
        {
            if (_disposed)
                return;
            if (disposing)
            {
_enumerator.Dispose();
}
            _disposed = true;
}
        #endregion
}
}