Amazon.frink

Download or view Amazon.frink in plain text format


class Amazon
{
   // This is an array of Category objects to process
   var queue = new array

   // This is a set of seen category codes (as integer)
   var seen = new set

   // This is a dictionary of <ID, Category>
   var hierarchy = new dict

   processQueue[] :=
   {
      while length[queue] > 0
      {
         cat = queue.popFirst[]
         readPage["http://www.amazon.com/" + cat.urlpart + "/zgbs/books/" + cat.id + "/", cat.id]
//         println["processing " + cat]
//         println[hierarchy]
//         println[length[queue] + " items in queue."]
      }
   }

   readPage[url, parentID] :=
   {
      page = read[url, "windows-1252"]
//      println[page]
//      for [urlpart, index, title] = parts =  page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)\/[^"']*?['"]\s*>([^<]+)/g
      for [urlpart, index, title] = parts =  page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)[^'"]*['"]\s*>([^<]+)/g
      {
         addQueue[urlpart, parseInt[index], parentID, title]
      }
   }

   // Add a new category to the queue
   addQueue[urlpart, id, parentID, title] :=
   {
      if seen.contains[id] or id == parentID
         return

      seen.put[id]
//      println["Adding $title"]
      parent = hierarchy@parentID
      parentTitle = (parent != undef) ? parent.title + " | " : ""
      fullTitle = parentTitle + title
      cat = new Category[urlpart, id, parentID, fullTitle]
      println["$id\t$parentID\t$fullTitle"]
      queue.push[cat]
      hierarchy@id = cat
   }
}

class Category
{
   var urlpart
   var id
   var parentID
   var title

   new[url, i, parent, t] :=
   {
      urlpart = url
      id = i
      parentID = parent
      title = t
   }
}

a = new Amazon
a.readPage["http://www.amazon.com/gp/bestsellers/books/ref=sv_b_2", undef]
a.processQueue[]
//println[a.hierarchy]


Download or view Amazon.frink in plain text format


This is a program written in the programming language Frink.
For more information, view the Frink Documentation or see More Sample Frink Programs.

Alan Eliasen was born 19970 days, 11 hours, 17 minutes ago.