A friend was looking for a way to list the space usage on a windows server that only had FTP access. I had written something similar for a project long ago, and polished up to do the job.
This python script will walk an FTP directory in a top-down, depth-first pattern. It uses the ftplib library, which I believe is built-in to most or all python distributions. Configure the FTP_* variables near the top to set the server, port, user, password, and the delay between each FTP operation (to avoid hammering the server). The script recursively processes directories, creating a dirStruct tuple that contains the following items:
(pwd, subdirList, fileList, sizeInFilesHere, sizeTotal)
pwd is a string like "/debian/dists/experimental"
subdirList is a list of tuples just like this one
fileList is a list of (filename, sizeInBytes) tuples
sizeInFilesHere is a sum of all the files in this directory
sizeTotal is a sum of all the files in this directory and all subdirectories
It also writes data to two CSV files:
- dirStruct_only_folders.csv
- Contains entries for just the directories.
- Local size is the total size of files in that folder (does not count subdirs).
- Total size is the sum of local size and total size of all subdirs.
- dirStruct_complete.csv
- Contains entries for both files and folders.
- Files do not have a total size, only a local size.
#!/usr/bin/env python
#
# A script to recursively walk an FTP server directory structure, recording information
# about the file and directory sizes as it traverses the folders.
#
# Stores output in two CSV files:
# dirStruct_only_folders.csv
# Contains entries for just the directories.
# Local size is the total size of files in that folder (does not count subdirs).
# Total size is the sum of local size and total size of all subdirs.
# dirStruct_complete.csv
# Contains entries for both files and folders.
# Files do not have a total size, only a local size.
#
# Customize the FTP_* variables below.
#
# Basically does a depth-first search.
#
# Written by Matthew L Beckler, matthew at mbeckler dot org.
# Released into the public domain, do whatever you like with this.
# Email me if you like the script or have suggestions to improve it.
from ftplib import FTP
from time import sleep
FTP_SERVER = "ftp.debian.org"
FTP_PORT = "21" # 21 is the default
FTP_USER = "" # leave empty for anon FTP server
FTP_PASS = ""
FTP_DELAY = 1 # how long to wait between calls to the ftp server
def parseListLine(line):
# Files look like "-rw-r--r-- 1 1176 1176 176158 Mar 30 01:52 README.mirrors.html"
# Directories look like "drwxr-sr-x 15 1176 1176 4096 Feb 15 09:22 dists"
# Returns (name, isDir, sizeBytes)
items = line.split()
return (items[8], items[0][0] == "d", int(items[4]))
# Since the silly ftp library makes us use a callback to handle each line of text from the server,
# we have a global lines buffer. Clear the buffer variable before doing each call.
lines = []
def appendLine(line):
global lines
lines.append(line)
def getListingParsed(ftp):
""" This is a sensible interface to the silly line getting system. Returns a copy of the directory listing, parsed. """
global lines
lines = []
ftp.dir(appendLine)
myLines = lines[:]
parsedLines = map(parseListLine, myLines)
return parsedLines
def descendDirectories(ftp):
# Will return a tuple for the current ftp directory, like this:
# (pwd, subdirList, fileList, sizeInFilesHere, sizeTotal)
# pwd is a string like "/debian/dists/experimental"
# subdirList is a list of tuples just like this one
# fileList is a list of (filename, sizeInBytes) tuples
# sizeInFilesHere is a sum of all the files in this directory
# sizeTotal is a sum of all the files in this directory and all subdirectories
sleep(FTP_DELAY) # be a nice client
# make our directory structure to return
pwd = ftp.pwd()
subdirList = []
fileList = []
sizeInFilesHere = 0
sizeTotal = 0
print pwd + "/"
items = getListingParsed(ftp)
for name, isDir, sizeBytes in items:
if not isDir:
fileList.append( (name, sizeBytes) )
sizeInFilesHere += sizeBytes
else:
# is a directory, so recurse
ftp.cwd(name)
struct = descendDirectories(ftp)
ftp.cwd("..")
subdirList.append(struct)
sizeTotal += struct[4]
# add in the size of all files here to sizeTotal
sizeTotal += sizeInFilesHere
return (pwd, subdirList, fileList, sizeInFilesHere, sizeTotal)
def pprintBytes(b):
""" Pretty prints a number of bytes with a proper suffix, like K, M, G, T. """
suffixes = ["", "K", "M", "G", "T", "?"]
ix = 0
while (b > 1024):
b /= 1024.0
ix += 1
s = suffixes[min(len(suffixes) - 1, ix)]
if int(b) == b:
return "%d%s" % (b, s)
else:
return "%.1f%s" % (b, s)
def pprintDirStruct(dirStruct):
""" Pretty print the directory structure. RECURSIVE FUNCTION! """
print "{}/ ({} in {} files here, {} total)".format(dirStruct[0], pprintBytes(dirStruct[3]), len(dirStruct[2]), pprintBytes(dirStruct[4]))
for ds in dirStruct[1]:
pprintDirStruct(ds)
def saveDirStructToCSV(dirStruct, fid, includeFiles):
""" Save the directory structure to a CSV file. RECURSIVE FUNCTION! """
# Info about this directory itself
fid.write("\"{}/\",{},{}\n".format(dirStruct[0], dirStruct[3], dirStruct[4]))
pwd = dirStruct[0]
# Info about files here
if includeFiles:
for name, size in dirStruct[2]:
fid.write("\"{}\",{},\n".format(pwd + "/" + name, size))
# Info about dirs here, recurse
for ds in dirStruct[1]:
saveDirStructToCSV(ds, fid, includeFiles)
print "Connecting to FTP server '%s' port %s..." % (FTP_SERVER, FTP_PORT)
ftp = FTP()
ftp.connect(FTP_SERVER, FTP_PORT)
if FTP_USER == "":
ftp.login()
else:
ftp.login(FTP_USER, FTP_PASS)
print "Walking directory structure..."
dirStruct = descendDirectories(ftp)
print ""
print "Finished descending directories, here is the info:"
pprintDirStruct(dirStruct)
print ""
FILENAME = "dirStruct_complete.csv"
print "Saving complete directory info (files and folders) to a CSV file: '%s'" % FILENAME
with open(FILENAME, "w") as fid:
fid.write("\"Path\",\"Local size\",\"Total size\"\n")
saveDirStructToCSV(dirStruct, fid, includeFiles=True)
FILENAME = "dirStruct_only_folders.csv"
print "Saving directory info (only folders) to a CSV file: '%s'" % FILENAME
with open(FILENAME, "w") as fid:
fid.write("\"Path\",\"Local size\",\"Total size\"\n")
saveDirStructToCSV(dirStruct, fid, includeFiles=False)
Sample CSV output:
"Path","Local size","Total size"
"/plugins/",5426535,7594527
"/plugins/foo-1.1.jar",7774,
"/plugins/CHANGELOG.txt",45169,
Local size is just the size of the file itself, or the size of all files in a directory. Total size is the total size of the files in a directory plus the total sizes of all subdirectories. Files do not have a total size entry.