#! /usr/local/bin/python

# This script was written by Neil Macneale <mac4@theory.org> It 
# is in the public domain. I ain't responsible for shit.
#
# Requires python 2, but could easily be modified to run with 1.5
#
# To use this python script, you are first going to need to list a 
# python path in the first line of this file. Or you can call
# python directly.
# 
# Then you are going to need a file which lists the files you are 
# intersted in counting. I generated mine with the following 
# command:
# find . -name "*" -follow | egrep '\.(c|h|py|pl|cpp)$' > files
#
# Then I ran this script which you are looking at.
# python count.py
#
# The end result is two files, 'singles.txt' and 'doubles.txt'
#
# Happy coding!!!

import string

# compare func for (key,val) pairs
def cmp(a,b):
    if a[1] > b[1]: return -1
    if a[1] < b[1]: return 1
    return 0

files = open("files").readlines()

# I only wanted to count printable chars. you may
# be interested in another set.
counting = string.printable[:-6]

# create counting hashes
singles = {}
doubles = {}
for c in counting:
    singles[c.upper()] = 0L
    for d in counting:
        doubles[(c+d).upper()] = 0L

# loop over files
for fNum in range(len(files)):
    f = files[fNum].strip()
    current = open(f).read()
    
    
    i=0
    then = '\n'  # any char not in the set you
    now  = '\n'  # are counting.
    # count chars
    while i < len(current):
        now = current[i]

        if now in counting:
            singles[now.upper()] += 1
            if then in counting:
                doubles[(then+now).upper()] += 1

        then = now
        i += 1

    print f + "                Complete, " + str(fNum+1)+"/"+str(len(files))
   
singles = singles.items()
singles.sort(cmp)
f = open("singles.txt", "w")
for v in singles:
    f.write(v[0] + "\t" + str(v[1]) + "\n")   
f.close()
    
doubles = doubles.items()
doubles.sort(cmp)
f = open("doubles.txt", "w")
for v in doubles:
    f.write(v[0] + "\t" + str(v[1]) + "\n")
f.close()
