trigram.frink

View or download trigram.frink in plain text format


// Finds most common n-letter patterns in words.

default = "http://www.mindspring.com/~eliasen/twain/innocents/innocents.html"
url = input["Enter URL [Innocents Abroad HTML]: ", default]

len = eval[input["Enter character length: "]]

file = read[url]

file =~ %s/<[^>]*>//gs    // Strip HTML
file =~ %s/[^a-z\s]//gsi    // Remove non-letter characters.
file =~ %s/[\r\n]/ /gs   // Remove linefeeds
file =~ %s/\s+/ /gs      // Turn multiple spaces into single space.
file = lc[file]

print[join["\n",getGrams[file, len]]]

getGrams[str, len] :=
{
   fileLen = length[str]
   grams = dict[]

   sum = 0

   for i = 0 to (fileLen - len)
   {
      sub = substrLen[str, i, len];

      // Comment this test out if you want to see the effects of spaces.
//      if (sub =~ %r/\s/)
//         next

      sum = sum + 1
      if ((count = grams@sub) != undef)
         grams@sub = count + 1
      else
         grams@sub = 1
   }

   return sort[array[grams], { |a,b| a@1 <=> b@1 } ]
}


View or download trigram.frink in plain text format


This is a program written in the programming language Frink.
For more information, view the Frink Documentation or see More Sample Frink Programs.

Alan Eliasen was born 17651 days, 5 hours, 22 minutes ago.