View or download trigram.frink in plain text format
// Finds most common n-letter patterns in words.
default = "http://www.mindspring.com/~eliasen/twain/innocents/innocents.html"
url = input["Enter URL [Innocents Abroad HTML]: ", default]
len = eval[input["Enter character length: "]]
file = read[url]
file =~ %s/<[^>]*>//gs // Strip HTML
file =~ %s/[^a-z\s]//gsi // Remove non-letter characters.
file =~ %s/[\r\n]/ /gs // Remove linefeeds
file =~ %s/\s+/ /gs // Turn multiple spaces into single space.
file = lc[file]
print[join["\n",getGrams[file, len]]]
getGrams[str, len] :=
{
fileLen = length[str]
grams = dict[]
sum = 0
for i = 0 to (fileLen - len)
{
sub = substrLen[str, i, len];
// Comment this test out if you want to see the effects of spaces.
// if (sub =~ %r/\s/)
// next
sum = sum + 1
if ((count = grams@sub) != undef)
grams@sub = count + 1
else
grams@sub = 1
}
return sort[array[grams], { |a,b| a@1 <=> b@1 } ]
}
View or download trigram.frink in plain text format
This is a program written in the programming language Frink.
For more information, view the Frink
Documentation or see More Sample Frink Programs.
Alan Eliasen was born 18863 days, 3 hours, 36 minutes ago.