trigram.frink

Download or view trigram.frink in plain text format


// Finds most common n-letter patterns in words.

default = "https://futureboy.us/twain/innocents/innocents.html"
url = input["Enter URL [Innocents Abroad HTML]: ", default]

len = eval[input["Enter character length: "]]

file = read[url]

file =~ %s/<[^>]*>//gs    // Strip HTML
file =~ %s/[^a-z\s]//gsi    // Remove non-letter characters.
file =~ %s/[\r\n]/ /gs   // Remove linefeeds
file =~ %s/\s+/ /gs      // Turn multiple spaces into single space.
file = lc[file]

print[joinln[getGrams[file, len]]]

getGrams[str, len] :=
{
   fileLen = length[str]
   grams = new dict

   for i = 0 to (fileLen - len)
   {
      sub = substrLen[str, i, len];

      // Comment this out if you want to see the effects of spaces.
//      if (sub =~ %r/\s/)
//         next

      grams.increment[sub, 1]
   }

   return sort[array[grams], byColumn[1] ]
}


Download or view trigram.frink in plain text format


This is a program written in the programming language Frink.
For more information, view the Frink Documentation or see More Sample Frink Programs.

Alan Eliasen was born 19945 days, 5 hours, 11 minutes ago.