FileStreamer
Requirements:
- Be able to read all the lines in a given file.
- Be able to do so even if the file is HUGE ( == don’t load it all at once).
- Control the number of items I want to receive and whether or not the enumerator ignore empty lines. Always nice to have.
- Thread-safe should be supported easily. Think about 50 threads, each reading the next line and processing it.
- Nice performance is a plus.
Playing with the API with my teammate Ron gave the following (code written in notepad, stupidity won’t compile):
“Common foreach” usage:
foreach (string line in FileStreamer.GetLines(@”c:\temp\myfile.txt”, true, 1000)) { /* .. code */ } // read 1000 items from the file while ignoring empty lines.
Reading from multiple threads usage:
using (FileStreamer streamer = new FileStreamer(@”c:\temp\myfile.txt”, true, -1)) // -1 means no limit, read all non-empty lines
{
Thread[] threads = new Thread[10];
for(int i=0; i<threads.Length; i++)
{
threads[i] = new Thread((ThreadStart)delegate {
string line;
if (!streamer.TryGetNextLine(out line)) // thread safe!
return; // end of file, we can exit
// do work …
});
threads[i].Start();
}
// join the threads + whatever …
}
After reading a few ideas in stackoverflow, I thought to share my solution:
// written by bogen (30/12/2008)
#region using
using System;
using System.Collections.Generic;
using System.IO;
#endregion
namespace Semingo.Common.Utils
{
/// <summary>
/// Return a stream of lines for the specified file.
/// This class is thread safe by design!
/// Use the static method FileStreamer.GetLines for not thread safe usage (via foreach)
/// </summary>
public class FileStreamer : IDisposable
{
#region fields
private readonly object _locker = new object();
private readonly string _path;
private readonly bool _ignoreEmptyLines;
private readonly int _limit;
private readonly IEnumerator<string> _enumerator;
private int _linesGiven;
private bool _disposed;
#endregion
#region ctors
/// <summary>
/// Create a file streamer instance
/// </summary>
/// <param name="path">File path</param>
public FileStreamer(string path) : this(path, false, -1)
{
}
/// <summary>
/// Create a file streamer instance
/// </summary>
/// <param name="path">File path</param>
/// <param name="ignoreEmptyLines">Should the streamer avoid empty lines</param>
/// <param name="limit">Number of maximum lines the streamer should return. Send -1 for no limit</param>
public FileStreamer(string path, bool ignoreEmptyLines, int limit)
{
if (!File.Exists(path))
throw new ArgumentException("Cannot find the file: " + path);
if (limit != -1 && limit <=0 )
throw new ArgumentException("Limit must be bigger than 0 (or -1 for no limit) but was: " + limit + ". File given was: " + path);
_path = path;
_ignoreEmptyLines = ignoreEmptyLines;
_limit = limit;
_enumerator = CreateStream().GetEnumerator();
}
#endregion
#region public API
public bool TryGetNextLine(out string nextItem)
{
lock (_locker)
{
return TryGetNextLineAssumingInsideLock(out nextItem);
}
}
public bool TryGetNextLines(out ICollection<string> nextItems, int howMany)
{
if (howMany <= 0)
throw new ArgumentException("'howMany' parameter must be > 0 but was " + howMany, "howMany");
nextItems = new List<string>(howMany);
lock (_locker)
{
string nextItem;
for(int i=0; i<howMany; i++)
{
if (!TryGetNextLineAssumingInsideLock(out nextItem))
break; // no more lines (EOF)
nextItems.Add(nextItem);
}
}
return nextItems.Count > 0;
}
public static IEnumerable<string> GetLines(string path)
{
return GetLines(path, false, -1);
}
/// <summary>
///
/// </summary>
/// <param name="path"></param>
/// <param name="ignoreEmptyLines"></param>
/// <param name="limit">send -1 for no limit</param>
/// <returns></returns>
public static IEnumerable<string> GetLines(string path, bool ignoreEmptyLines, int limit)
{
using (FileStreamer streamer = new FileStreamer(path, ignoreEmptyLines, limit))
{
string nextItem;
while (streamer.TryGetNextLine(out nextItem))
yield return nextItem;
yield break; // EOF
}
}
///<summary>
///Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
///</summary>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
#endregion
#region private API
/// <summary>
/// Get the next line in the file.
/// dev: assume that the lock is from the outside, by the caller (this is why it's a private method)
/// </summary>
private bool TryGetNextLineAssumingInsideLock(out string nextItem)
{
nextItem = null;
if (_linesGiven == _limit)
return false; // we reached the limit, no more please.
if (!_enumerator.MoveNext())
return false; // end of stream (EOF)
nextItem = _enumerator.Current;
_linesGiven++;
return true;
}
private IEnumerable<string> CreateStream()
{
using (FileStream fs = new FileStream(_path, FileMode.Open, FileAccess.Read, FileShare.Read, 1024, FileOptions.SequentialScan))
using (StreamReader reader = new StreamReader(fs))
{
string line;
while ((line = reader.ReadLine()) != null)
{
if (_ignoreEmptyLines && line == string.Empty)
continue; // skip empty lines if needed
yield return line;
}
yield break;
}
}
protected void Dispose(bool disposing)
{
if (_disposed)
return;
if (disposing)
{
_enumerator.Dispose();
}
_disposed = true;
}
#endregion
}
}