Monday, 13 October 2014

How to count Words, alphabets, numbers in a text file using Python

File handling is damn easy in python. It's so flexible making your file handling more friendly. You get total controls over files.
Here I have written a small program that will count number of words, alphabets and numbers within a file read from the disk.
Total 4 functions are used isnumeric and isalpha are character functions, len function calculate the size of the array, split function splits one line of string into words into an array.

I am giving the code below, you can try this.

#!/usr/local/bin/python3.4
# pleases follow the indention as it is most important in python. python don't use begin/end, if/end if or loop/
# end loop so python identify statement by indention.

def WordCount(file):
    wrdcnt=0
# loop below get one line of the file content per iteration
    for line in file:
# Split the line into words with default space delimiter and returns an array of words
        words=line.split()
# len function get the size of the array i.e. number of words in the line
        wrdcnt=wrdcnt+len(words)
    return wrdcnt

def patternCount(pat):
    patCount=0
# loop below get one line of the file content per iteration
    for line in file:
# Checks whether pattern exists in the line, if true increment wrdcnt by 1
        patCount = patCount + line.count(pat)
    return patCount

def AlphaCount(file):
    alphacnt=0
# loop below get one line of the file content per iteration
    for line in file:
# loop below get one character of the line per iteration
        for ch in line:
# Checks whether character is alphabet, if true increment wrdcnt by 1
            if ch.isalpha()==True:
                alphacnt=alphacnt+1
    return alphacnt

def NumCount(file):
    numcnt=0
# loop below get one line of the file content per iteration
    for line in file:
# loop below get one character of the line per iteration
        for ch in line:
# Checks whether character is number,  if true increment numcnt by 1
            if ch.isnumeric()==True:
                numcnt=numcnt+1
    return numcnt

filename="wordCount.py"
# Opens the file for reading
file=open(filename,"r")

# prints name of the file by %s (filename) and %d (WordCount(file))

print("Number of words in %s is %d"%(filename,WordCount(file)))


# Get the file pointer to the beginning of the file
file.seek(0,0)

# prints name of the file by %s (filename) and %d (AlphaCount(file))

print("Number of alphabets in %s is %d"%(filename,AlphaCount(file)))

# Get the file pointer to the beginning of the file
file.seek(0,0)

# prints name of the file by %s (filename) and %d (NumCount(file))

print("Number of numerics in %s is %d"%(filename,NumCount(file)))

file.seek(0,0)

print("Patter print exists for %d times "%(patternCount("print")))
file.close()

No comments: