4199b9a26696

lols
[view raw] [browse files]
author Steve Losh <steve@stevelosh.com>
date Wed, 17 Aug 2016 15:25:07 +0000
parents 2cb0208c1744
children 37d71dad1f25
branches/tags (none)
files make-quickutils.lisp package.lisp quickutils.lisp sand.asd src/markov.lisp

Changes

--- a/make-quickutils.lisp	Wed Aug 17 03:29:14 2016 +0000
+++ b/make-quickutils.lisp	Wed Aug 17 15:25:07 2016 +0000
@@ -10,6 +10,7 @@
                :rcurry
                :n-grams
                :define-constant
+               :riffle
                ; :switch
                ; :while
                ; :ensure-boolean
--- a/package.lisp	Wed Aug 17 03:29:14 2016 +0000
+++ b/package.lisp	Wed Aug 17 15:25:07 2016 +0000
@@ -69,6 +69,7 @@
 (defpackage #:sand.markov
   (:use
     #:cl
+    #:cl-arrows
     #:losh
     #:iterate
     #:split-sequence
--- a/quickutils.lisp	Wed Aug 17 03:29:14 2016 +0000
+++ b/quickutils.lisp	Wed Aug 17 15:25:07 2016 +0000
@@ -2,7 +2,7 @@
 ;;;; See http://quickutil.org for details.
 
 ;;;; To regenerate:
-;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT) :ensure-package T :package "SAND.QUICKUTILS")
+;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE) :ensure-package T :package "SAND.QUICKUTILS")
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (unless (find-package "SAND.QUICKUTILS")
@@ -17,7 +17,7 @@
                                          :MAKE-GENSYM-LIST :ONCE-ONLY
                                          :ENSURE-FUNCTION :COMPOSE :CURRY
                                          :RCURRY :TAKE :N-GRAMS
-                                         :DEFINE-CONSTANT))))
+                                         :DEFINE-CONSTANT :RIFFLE))))
 
   (deftype string-designator ()
     "A string designator type. A string designator is either a string, a symbol,
@@ -241,8 +241,16 @@
     `(defconstant ,name (%reevaluate-constant ',name ,initial-value ,test)
        ,@(when documentation `(,documentation))))
   
+
+  (defun riffle (list obj)
+    "Insert the item `obj` in between each element of `list`."
+    (loop :for (x . xs) :on list
+          :collect x
+          :when xs
+            :collect obj))
+  
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (export '(with-gensyms with-unique-names once-only compose curry rcurry
-            n-grams define-constant)))
+            n-grams define-constant riffle)))
 
 ;;;; END OF quickutils.lisp ;;;;
--- a/sand.asd	Wed Aug 17 03:29:14 2016 +0000
+++ b/sand.asd	Wed Aug 17 15:25:07 2016 +0000
@@ -14,7 +14,15 @@
                #:split-sequence
                #:parenscript
                #:sketch
-               #:losh)
+               #:losh
+               #:drakma
+               #:yason
+               #:flexi-streams
+               #:sanitize
+               #:html-entities
+               #:plump
+               #:clss
+               )
 
   :serial t
   :components
--- a/src/markov.lisp	Wed Aug 17 03:29:14 2016 +0000
+++ b/src/markov.lisp	Wed Aug 17 15:25:07 2016 +0000
@@ -52,7 +52,6 @@
                    :database database
                    :beginnings (coerce beginnings 'vector))))
 
-
 (defun generate-sentence (markov)
   (iterate
     (repeat 50)
@@ -65,10 +64,123 @@
 
 
 
-(defparameter *m* (build-markov-generator *text* 2))
+(defun firebase-get (url)
+  (-> url
+    drakma:http-request
+    (flex:octets-to-string :external-format :utf-8)
+    yason:parse))
+
+(defun hn-top ()
+  (firebase-get "https://hacker-news.firebaseio.com/v0/topstories.json"))
+
+(defun hn-item (id)
+  (firebase-get
+    (format nil "https://hacker-news.firebaseio.com/v0/item/~d.json" id)))
+
+(defun hn-story (story-id)
+  (hn-item story-id))
+
+(defun hn-comment (story-id)
+  (hn-item story-id))
+
+(defun hn-text (comment)
+  (-> (gethash "text" comment)
+    sanitize:clean
+    html-entities:decode-entities))
+
+(defparameter *errors* 0)
+
+(defun hn-comments (story-id)
+  (iterate
+    (with story = (hn-story story-id))
+    (with children = (gethash "kids" story))
+    (repeat 50)
+    ; (sleep 0.1)
+    (while children)
+    (for child-id = (pop children))
+    (for child = (handler-case (hn-comment child-id)
+                   (drakma::drakma-simple-error () (incf *errors*) nil)))
+    (when child
+      (collect child)
+      (setf children (append children (gethash "kids" child))))))
+
+(defvar *hn* nil)
+
+(defun build-hn-corpus ()
+  (length (setf *hn* (-<> (hn-top)
+                       (take 15 <>)
+                       (mapcan #'hn-comments <>)
+                       (mapcar #'hn-text <>)
+                       (format nil "~{~a~%~}" <>)))))
 
 
-(iterate (repeat 10)
-         (terpri)
-         (terpri)
-         (princ (generate-sentence *m*)))
+(defun ratebeer-get (page)
+  (-<> (format nil "http://www.ratebeer.com/beer-ratings/0/~d/" page)
+    drakma:http-request
+    plump:parse))
+
+(defun ratebeer-clean (raw)
+  (-<> raw
+    (plump:get-elements-by-tag-name <> "table")
+    car
+    (plump:get-elements-by-tag-name <> "td")
+    (mapcar (rcurry #'plump:get-elements-by-tag-name "span") <>)
+    (remove-if-not #'identity <>)
+    (mapcar #'first <>)
+    (mapcar #'plump:text <>)))
+
+(defvar *beer* nil)
+
+(defun build-beer-corpus ()
+  (length
+    (setf *beer*
+          (iterate
+            (for page :from 1 :to 30)
+            (appending (ratebeer-clean (ratebeer-get page)) :into reviews)
+            (finally (return (format nil "~{~A~%~}" reviews)))))))
+
+
+
+(defun wine-get-list (page-number)
+  (-<> (format nil "http://www.winemag.com/?s=&drink_type=wine&page=~D"
+               page-number)
+    drakma:http-request
+    plump:parse))
+
+(defun wine-get-review (url)
+  (-<> url
+    drakma:http-request
+    plump:parse))
+
+
+(defun wine-clean-list (list-page)
+  (-<> list-page
+    (clss:select "a.review-listing" <>)
+    (map 'list (rcurry #'plump:attribute "href") <>)))
+
+(defun wine-clean-review (review-page)
+  (plump:text (elt (clss:select "#review .description" review-page) 0)))
+
+
+(defparameter *wine* nil)
+(defun build-wine-corpus ()
+  (length
+    (setf *wine*
+          (iterate
+            (for page :from 1 :to 10)
+            (for review-links = (wine-clean-list (wine-get-list page)))
+            (appending (mapcar (compose #'wine-clean-review #'wine-get-review)
+                               review-links)
+                       :into reviews)
+            (finally (return (format nil "~{~A~%~}" reviews)))))))
+
+(defparameter *m*
+  (build-markov-generator (concatenate 'string *hn* *wine*) 2))
+
+
+(iterate (repeat 50)
+         (for sentence = (generate-sentence *m*))
+         (when (<= (length sentence) 140)
+           (terpri)
+           (terpri)
+           (princ sentence)))