# HG changeset patch # User Steve Losh # Date 1471447507 0 # Node ID 4199b9a26696e8711c1b6fe1d9e63fdb4d442c0e # Parent 2cb0208c1744cb7ddf9cffaed235ee8f26a3bab0 lols diff -r 2cb0208c1744 -r 4199b9a26696 make-quickutils.lisp --- a/make-quickutils.lisp Wed Aug 17 03:29:14 2016 +0000 +++ b/make-quickutils.lisp Wed Aug 17 15:25:07 2016 +0000 @@ -10,6 +10,7 @@ :rcurry :n-grams :define-constant + :riffle ; :switch ; :while ; :ensure-boolean diff -r 2cb0208c1744 -r 4199b9a26696 package.lisp --- a/package.lisp Wed Aug 17 03:29:14 2016 +0000 +++ b/package.lisp Wed Aug 17 15:25:07 2016 +0000 @@ -69,6 +69,7 @@ (defpackage #:sand.markov (:use #:cl + #:cl-arrows #:losh #:iterate #:split-sequence diff -r 2cb0208c1744 -r 4199b9a26696 quickutils.lisp --- a/quickutils.lisp Wed Aug 17 03:29:14 2016 +0000 +++ b/quickutils.lisp Wed Aug 17 15:25:07 2016 +0000 @@ -2,7 +2,7 @@ ;;;; See http://quickutil.org for details. ;;;; To regenerate: -;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT) :ensure-package T :package "SAND.QUICKUTILS") +;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE) :ensure-package T :package "SAND.QUICKUTILS") (eval-when (:compile-toplevel :load-toplevel :execute) (unless (find-package "SAND.QUICKUTILS") @@ -17,7 +17,7 @@ :MAKE-GENSYM-LIST :ONCE-ONLY :ENSURE-FUNCTION :COMPOSE :CURRY :RCURRY :TAKE :N-GRAMS - :DEFINE-CONSTANT)))) + :DEFINE-CONSTANT :RIFFLE)))) (deftype string-designator () "A string designator type. A string designator is either a string, a symbol, @@ -241,8 +241,16 @@ `(defconstant ,name (%reevaluate-constant ',name ,initial-value ,test) ,@(when documentation `(,documentation)))) + + (defun riffle (list obj) + "Insert the item `obj` in between each element of `list`." + (loop :for (x . xs) :on list + :collect x + :when xs + :collect obj)) + (eval-when (:compile-toplevel :load-toplevel :execute) (export '(with-gensyms with-unique-names once-only compose curry rcurry - n-grams define-constant))) + n-grams define-constant riffle))) ;;;; END OF quickutils.lisp ;;;; diff -r 2cb0208c1744 -r 4199b9a26696 sand.asd --- a/sand.asd Wed Aug 17 03:29:14 2016 +0000 +++ b/sand.asd Wed Aug 17 15:25:07 2016 +0000 @@ -14,7 +14,15 @@ #:split-sequence #:parenscript #:sketch - #:losh) + #:losh + #:drakma + #:yason + #:flexi-streams + #:sanitize + #:html-entities + #:plump + #:clss + ) :serial t :components diff -r 2cb0208c1744 -r 4199b9a26696 src/markov.lisp --- a/src/markov.lisp Wed Aug 17 03:29:14 2016 +0000 +++ b/src/markov.lisp Wed Aug 17 15:25:07 2016 +0000 @@ -52,7 +52,6 @@ :database database :beginnings (coerce beginnings 'vector)))) - (defun generate-sentence (markov) (iterate (repeat 50) @@ -65,10 +64,123 @@ -(defparameter *m* (build-markov-generator *text* 2)) +(defun firebase-get (url) + (-> url + drakma:http-request + (flex:octets-to-string :external-format :utf-8) + yason:parse)) + +(defun hn-top () + (firebase-get "https://hacker-news.firebaseio.com/v0/topstories.json")) + +(defun hn-item (id) + (firebase-get + (format nil "https://hacker-news.firebaseio.com/v0/item/~d.json" id))) + +(defun hn-story (story-id) + (hn-item story-id)) + +(defun hn-comment (story-id) + (hn-item story-id)) + +(defun hn-text (comment) + (-> (gethash "text" comment) + sanitize:clean + html-entities:decode-entities)) + +(defparameter *errors* 0) + +(defun hn-comments (story-id) + (iterate + (with story = (hn-story story-id)) + (with children = (gethash "kids" story)) + (repeat 50) + ; (sleep 0.1) + (while children) + (for child-id = (pop children)) + (for child = (handler-case (hn-comment child-id) + (drakma::drakma-simple-error () (incf *errors*) nil))) + (when child + (collect child) + (setf children (append children (gethash "kids" child)))))) + +(defvar *hn* nil) + +(defun build-hn-corpus () + (length (setf *hn* (-<> (hn-top) + (take 15 <>) + (mapcan #'hn-comments <>) + (mapcar #'hn-text <>) + (format nil "~{~a~%~}" <>))))) -(iterate (repeat 10) - (terpri) - (terpri) - (princ (generate-sentence *m*))) +(defun ratebeer-get (page) + (-<> (format nil "http://www.ratebeer.com/beer-ratings/0/~d/" page) + drakma:http-request + plump:parse)) + +(defun ratebeer-clean (raw) + (-<> raw + (plump:get-elements-by-tag-name <> "table") + car + (plump:get-elements-by-tag-name <> "td") + (mapcar (rcurry #'plump:get-elements-by-tag-name "span") <>) + (remove-if-not #'identity <>) + (mapcar #'first <>) + (mapcar #'plump:text <>))) + +(defvar *beer* nil) + +(defun build-beer-corpus () + (length + (setf *beer* + (iterate + (for page :from 1 :to 30) + (appending (ratebeer-clean (ratebeer-get page)) :into reviews) + (finally (return (format nil "~{~A~%~}" reviews))))))) + + + +(defun wine-get-list (page-number) + (-<> (format nil "http://www.winemag.com/?s=&drink_type=wine&page=~D" + page-number) + drakma:http-request + plump:parse)) + +(defun wine-get-review (url) + (-<> url + drakma:http-request + plump:parse)) + + +(defun wine-clean-list (list-page) + (-<> list-page + (clss:select "a.review-listing" <>) + (map 'list (rcurry #'plump:attribute "href") <>))) + +(defun wine-clean-review (review-page) + (plump:text (elt (clss:select "#review .description" review-page) 0))) + + +(defparameter *wine* nil) +(defun build-wine-corpus () + (length + (setf *wine* + (iterate + (for page :from 1 :to 10) + (for review-links = (wine-clean-list (wine-get-list page))) + (appending (mapcar (compose #'wine-clean-review #'wine-get-review) + review-links) + :into reviews) + (finally (return (format nil "~{~A~%~}" reviews))))))) + +(defparameter *m* + (build-markov-generator (concatenate 'string *hn* *wine*) 2)) + + +(iterate (repeat 50) + (for sentence = (generate-sentence *m*)) + (when (<= (length sentence) 140) + (terpri) + (terpri) + (princ sentence)))