Discussions with several authors since the launch of the site has often touched the matter of testing multiple files and the TAR issue. Some authors have difficulty before they can implement their own file analysis (sorting order) with a rather banal task: supporting elementary archiver framework in their program.
Here is a public domain solution for the problem. A C++ template that you can use for your programs.
int main(int argc, const char **argv) { Walker<FileIterator<Filename> > w(std::for_each(argv + 1, argv + argc, Walker<FileIterator<Filename> >().Recurse())); std::sort(w.begin(), w.end()); std::copy(w.begin(), w.end(), std::ostream_iterator<std::string>(std::cout, "\n")); return 0; }Let's test it.
sami@q6600:~/archiver_template> g++ -Wall minimal_filelist.cpp -o minimal_filelist sami@q6600:~/archiver_template> ./minimal_filelist 'm*' 'p*' 'w*' minimal_filelist.cpp minimal_filelist.cpp~ walker.h posix_file_iterator.h mask.h mydir/somefile.h mask.h~ posix_file_iterator.h~ walker.h~ minimal_filelist mydir/another_dir/file1 mydir/another_dir/file2The Filename looks like this:
struct Filename: std::string { typedef std::string NameT; Filename& Name(const NameT &s) { static_cast<NameT &>(*this) = s; return *this; } Filename& Size(uint64_t n) { return *this; } //we don't need this now const NameT& Name() const { return *this; } NameT Extension() const { size_type k = find_last_of('.'); return k == npos ? c_str() : substr(k + 1); } bool operator< (const Filename &b) const { return Extension() < b.Extension(); } };
template<class Iter> class Walker: public std::vector<typename Iter::TypeT> { typedef typename Iter::TypeT::NameT S; bool recurse; void Walk(Iter f, const Mask<S> &m = Mask<S>()) { while (++f) m != f.FileName() || (f.IsFile() && (push_back(f), true)) || (f.IsDirectory() && recurse && (Walk(f.Name()), true)); } public: Walker(): recurse(false) {} Walker& Recurse(bool r = true) { recurse = r; return *this; } Walker& operator() (const PathAndMask<S> &pm) { Walk(pm.P(), pm.M()); return *this; } Walker& operator() (const char *s) { return (*this)(S(s)); } };
c:*which doesn't refer to all files beginning with
c:but to a wholly different place. This is why we need platform specific way to split path and mask.
We shall attempt to provide fairly universal solution, by handling the drive letters even for non-Windows platforms:
template<class S> struct PathAndMask { S p; Mask<S> m; PathAndMask(S s): p(s) { std::replace(p.begin(), p.end(), '\\', '/'); RelativeDir(p) && (p += "/*", true); typename S::size_type k = p.find_last_of('/'); Mask<S>("?:*") == p && (k == 2 && (p.insert(2, "/."), k += 2) || k == p.npos && (k = (RelativeDir(p.substr(2)) ? p.length() : 2), p.insert(2, "."), true)); m = (k != p.npos ? p.substr(k + 1) : p); Mask<S>("?:") == p.erase(k != p.npos ? k + !k : 0) && (p += '.', true); m.empty() && (m = Mask<S>(), true); } const S& P() const { return p; } const Mask<S>& M() const { return m; } };
template<class T> class FileIterator: public T, private stat,dirent { DIR *dir; const typename T::NameT path; void Set() { T::Name(path + FileName()), ::stat(T::Name().c_str(), this), T::Size(st_size); } bool DE(dirent *de) { return de && ((static_cast<dirent &>(*this) = *de), Set(), true); } public: typedef T TypeT; FileIterator(const typename T::NameT &s): dir(opendir(s.empty() ? "." : s.c_str())), path(s.empty() || s == "/" ? s : s + '/') {} ~FileIterator() { dir && closedir(dir); } bool operator++ () { return dir && DE(readdir(dir)); } bool IsFile() const { return S_ISREG(st_mode); } bool IsDirectory() const { return S_ISDIR(st_mode) && !RelativeDir(FileName()); } typename T::NameT FileName() const { return typename T::NameT(d_name); } };The FileIterator can support storing data such as file timestamps, permissions, etc. just by adding couple of more lines to the FileIterator::Set() function. With trivial modifications we can use FindFirstFile() and FindNextFile() instead for MS-Windows, having operator++ call FindFirstFile() when called for the first time.
template<class T> struct Mask: T { Mask(const T &s = "*"): T(s) {} bool operator== (const T &s) const { return !fnmatch(T::c_str(), s.c_str(), 0); } bool operator!= (const T &s) const { return !(*this == s); } };For non-POSIX platforms such as MS-Windows, we need to implement our own fnmatch().
inline static int fnmatch(const char *m, const char *s, int dummy = 0) { if (*m == '*') for (++m; *s; ++s) if (!fnmatch(m, s)) return 0; return (!*s || !(*s == *m || *m == '?')) ? *m | *s : fnmatch(++m, ++s); }For Windows we need to disable the MinGW argument globbing:
#ifdef __MINGW32__ int _CRT_glob; //disable argument globbing for mingw32 #endif
int main(int argc, const char **argv) { Arguments a(argc, argv); if (a.size() < 2) throw std::string("usage..."); typedef std::string FileNameType; FileNameType archive(a.Pop()); RemoveIfExists(archive); UI ui(a.Bool("-v")); bool recurse(a.Bool("-r")); CompressionMethod method(a.Num("-m")); typedef Walker<FileIterator<FileProperties<FileNameType> > > W; W w (std::for_each(a.begin(), a.end(), W().Recurse(recurse))); if (w.empty()) throw std::string("no files found"); std::sort(w.begin(), w.end()); FileOut out(archive); std::for_each(w.begin(), w.end(), Archiver(out << method, ui, *std::auto_ptr<Compressor>(method()))).Done(); return 0; }A simple class to handle arguments given from the command line.
struct Arguments: std::vector<std::string> { Arguments(int argc, const char **argv) { std::copy(argv + 1, argv + argc, std::inserter(*this, begin())); } value_type Pop() { if (empty()) return value_type(); value_type s(front()); return erase(begin()), s; } bool Bool(const value_type &s) { std::vector<std::string>::iterator i = std::find(begin(), end(), s); return i != end() && (erase(i), true); } struct SubstrMatch: std::string { SubstrMatch (const std::string &s): std::string(s) {} bool operator() (const std::string &b) const { return b.substr(0, size()) == *this; } }; template<class T> bool Num(T &n, const value_type &s) { std::vector<value_type>::iterator i = std::find_if(begin(), end(), SubstrMatch(s)); return i != end() && (std::istringstream(i->substr(s.size())) >> n, erase(i), true); } unsigned Num(const value_type &s, unsigned default_value = 0) { unsigned k = default_value; return Num(k, s), k; } };
struct Archiver { std::ostream &os; uint64_t num, sum; UI &ui; Compressor ∁ Archiver(std::ostream &o, UI &u, Compressor &c): os(o), num(0), sum(0), ui(u), comp(c) {} template<class T> void operator() (const T &k) { ui.Processing(k); FileIn in(k.Name()); ++num, sum += k.Size(); comp(os << k, in, k.Size()); if (in.bad()) throw std::string("problem reading ") + k.Name(); if (os.bad()) throw std::string("problem writing output"); } Archiver& Done() { ui.Done(num, sum); return *this; } };
struct Compressor { virtual void operator () (std::ostream &out, std::istream &in, uint64_t n) { throw std::string("compressor not implemented"); } virtual ~Compressor() {} }; struct CopyCompressor: Compressor { void operator() (std::ostream &out, std::istream &in, uint64_t n) { while (n) out.put(in.get()), --n; } };
int main(int argc, const char **argv) { Filelist a(argc, argv); if (a.empty()) throw std::string("usage...); FileIn in(a.Pop()); if (a.empty()) throw std::string("no files defined"); CompressionMethod method; in >> method; UI ui(a.Bool("-v")); Extractor(ui).operator()<FileProperties<std::string> >(in, a, *std::auto_ptr<Compressor>(method())).Done(); return 0; }Files that are extracted are given from the command line, so we implement additional functionality for the argument class.
struct Filelist: Arguments { Filelist(int argc, const char **argv): Arguments(argc, argv) {} struct MaskMatch: std::string { MaskMatch(const std::string &a): std::string(a) {} bool operator() (const Mask<std::string> &m) const { return m == *this; } }; bool PopMask (const std::string &s) { return Bool(s) || std::find_if(begin(), end(), MaskMatch(s)) != end(); } };
struct Extractor { uint64_t num, sum; UI &ui; Extractor(UI &u): num(0), sum(0), ui(u) {} template<class T> Extractor& operator() (std::fstream &in, Filelist &a, Compressor &comp) { for (T k; a.size() && (in >> k).good(); ) { bool ok = a.PopMask(k.Name()); ok ? ui.Processing(k) : ui.Skip(k); ok && (RemoveIfExists(k.Name()), CreatePathForFile(k.Name()), sum += k.Size(), ++num); File o(ok ? k.Name() : "", std::ios::out); comp(o, in, k.Size()); } return *this; } Extractor& Done() { ui.Done(num, sum); return *this; } };
sami@q6600:~/archiver_template> g++ -Wall archiver.cpp -o archiver sami@q6600:~/archiver_template> ./archiver /tmp/k -r -v 'm*' file /tmp/k already exist, overwrite? (y/n)y minimal_filelist.cpp minimal_filelist.cpp~ mask.h mydir/somefile.h mask.h~ minimal_filelist mydir/another_dir/file1 mydir/another_dir/file2 56189 bytes, 8 files. sami@q6600:~/archiver_template> g++ -Wall extractor.cpp -o extractor sami@q6600:~/archiver_template> md kk sami@q6600:~/archiver_template> cd kk sami@q6600:~/archiver_template/kk> touch mask.h sami@q6600:~/archiver_template/kk> ../extractor /tmp/k -v '*.h' skipping file minimal_filelist.cpp skipping file minimal_filelist.cpp~ mask.h file mask.h already exist, overwrite? (y/n)y mydir/somefile.h skipping file mask.h~ skipping file minimal_filelist skipping file mydir/another_dir/file1 skipping file mydir/another_dir/file2 1614 bytes, 2 files. sami@q6600:~/archiver_template/kk> ls -R .: mask.h mydir ./mydir: somefile.h sami@q6600:~/archiver_template/kk> diff -s mask.h ../mask.h Files mask.h and ../mask.h are identical
Update 2009-05-06: the code contains bugs: no static keyword should be at static int _CRT_glob;
for MinGW, the Extractor(ui)... line should end with .Done(); and there are missing brackets in the ok && ...
line at Extractor. (These bugs are fixed at the quoted code in this article.) Also some of the code is redundant. Also note that the programs are case-sensitive (when globbing files).
Let me know if you found this code useful or made improvements to it. Write to sami (at) compressionratings.com
Thanks to Eugene Shelwien for feedback and benchmarks.