trigram.frink

Download or view trigram.frink in plain text format


// Finds most common n-letter patterns in words.

default = "https://futureboy.us/twain/innocents/innocents.html"
url = input["Enter URL [Innocents Abroad HTML]: ", default]

len = eval[input["Enter character length: "]]

file = read[url]

file =~ %s/<[^>]*>//gs    // Strip HTML
file =~ %s/[^a-z\s]//gsi    // Remove non-letter characters.
file =~ %s/[\r\n]/ /gs   // Remove linefeeds
file =~ %s/\s+/ /gs      // Turn multiple spaces into single space.
file = lc[file]

print[joinln[getGrams[file, len]]]

getGrams[str, len] :=
{
   fileLen = length[str]
   grams = new dict

   for i = 0 to (fileLen - len)
   {
      sub = substrLen[str, i, len];

      // Comment this out if you want to see the effects of spaces.
//      if (sub =~ %r/\s/)
//         next

      grams.increment[sub, 1]
   }

   return sort[array[grams], byColumn[1] ]
}


Download or view trigram.frink in plain text format


This is a program written in the programming language Frink.
For more information, view the Frink Documentation or see More Sample Frink Programs.

Alan Eliasen was born 19944 days, 11 hours, 48 minutes ago.