--- a/make-quickutils.lisp Wed Aug 17 03:29:14 2016 +0000
+++ b/make-quickutils.lisp Wed Aug 17 15:25:07 2016 +0000
@@ -10,6 +10,7 @@
:rcurry
:n-grams
:define-constant
+ :riffle
; :switch
; :while
; :ensure-boolean
--- a/package.lisp Wed Aug 17 03:29:14 2016 +0000
+++ b/package.lisp Wed Aug 17 15:25:07 2016 +0000
@@ -69,6 +69,7 @@
(defpackage #:sand.markov
(:use
#:cl
+ #:cl-arrows
#:losh
#:iterate
#:split-sequence
--- a/quickutils.lisp Wed Aug 17 03:29:14 2016 +0000
+++ b/quickutils.lisp Wed Aug 17 15:25:07 2016 +0000
@@ -2,7 +2,7 @@
;;;; See http://quickutil.org for details.
;;;; To regenerate:
-;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT) :ensure-package T :package "SAND.QUICKUTILS")
+;;;; (qtlc:save-utils-as "quickutils.lisp" :utilities '(:WITH-GENSYMS :ONCE-ONLY :COMPOSE :CURRY :RCURRY :N-GRAMS :DEFINE-CONSTANT :RIFFLE) :ensure-package T :package "SAND.QUICKUTILS")
(eval-when (:compile-toplevel :load-toplevel :execute)
(unless (find-package "SAND.QUICKUTILS")
@@ -17,7 +17,7 @@
:MAKE-GENSYM-LIST :ONCE-ONLY
:ENSURE-FUNCTION :COMPOSE :CURRY
:RCURRY :TAKE :N-GRAMS
- :DEFINE-CONSTANT))))
+ :DEFINE-CONSTANT :RIFFLE))))
(deftype string-designator ()
"A string designator type. A string designator is either a string, a symbol,
@@ -241,8 +241,16 @@
`(defconstant ,name (%reevaluate-constant ',name ,initial-value ,test)
,@(when documentation `(,documentation))))
+
+ (defun riffle (list obj)
+ "Insert the item `obj` in between each element of `list`."
+ (loop :for (x . xs) :on list
+ :collect x
+ :when xs
+ :collect obj))
+
(eval-when (:compile-toplevel :load-toplevel :execute)
(export '(with-gensyms with-unique-names once-only compose curry rcurry
- n-grams define-constant)))
+ n-grams define-constant riffle)))
;;;; END OF quickutils.lisp ;;;;
--- a/sand.asd Wed Aug 17 03:29:14 2016 +0000
+++ b/sand.asd Wed Aug 17 15:25:07 2016 +0000
@@ -14,7 +14,15 @@
#:split-sequence
#:parenscript
#:sketch
- #:losh)
+ #:losh
+ #:drakma
+ #:yason
+ #:flexi-streams
+ #:sanitize
+ #:html-entities
+ #:plump
+ #:clss
+ )
:serial t
:components
--- a/src/markov.lisp Wed Aug 17 03:29:14 2016 +0000
+++ b/src/markov.lisp Wed Aug 17 15:25:07 2016 +0000
@@ -52,7 +52,6 @@
:database database
:beginnings (coerce beginnings 'vector))))
-
(defun generate-sentence (markov)
(iterate
(repeat 50)
@@ -65,10 +64,123 @@
-(defparameter *m* (build-markov-generator *text* 2))
+(defun firebase-get (url)
+ (-> url
+ drakma:http-request
+ (flex:octets-to-string :external-format :utf-8)
+ yason:parse))
+
+(defun hn-top ()
+ (firebase-get "https://hacker-news.firebaseio.com/v0/topstories.json"))
+
+(defun hn-item (id)
+ (firebase-get
+ (format nil "https://hacker-news.firebaseio.com/v0/item/~d.json" id)))
+
+(defun hn-story (story-id)
+ (hn-item story-id))
+
+(defun hn-comment (story-id)
+ (hn-item story-id))
+
+(defun hn-text (comment)
+ (-> (gethash "text" comment)
+ sanitize:clean
+ html-entities:decode-entities))
+
+(defparameter *errors* 0)
+
+(defun hn-comments (story-id)
+ (iterate
+ (with story = (hn-story story-id))
+ (with children = (gethash "kids" story))
+ (repeat 50)
+ ; (sleep 0.1)
+ (while children)
+ (for child-id = (pop children))
+ (for child = (handler-case (hn-comment child-id)
+ (drakma::drakma-simple-error () (incf *errors*) nil)))
+ (when child
+ (collect child)
+ (setf children (append children (gethash "kids" child))))))
+
+(defvar *hn* nil)
+
+(defun build-hn-corpus ()
+ (length (setf *hn* (-<> (hn-top)
+ (take 15 <>)
+ (mapcan #'hn-comments <>)
+ (mapcar #'hn-text <>)
+ (format nil "~{~a~%~}" <>)))))
-(iterate (repeat 10)
- (terpri)
- (terpri)
- (princ (generate-sentence *m*)))
+(defun ratebeer-get (page)
+ (-<> (format nil "http://www.ratebeer.com/beer-ratings/0/~d/" page)
+ drakma:http-request
+ plump:parse))
+
+(defun ratebeer-clean (raw)
+ (-<> raw
+ (plump:get-elements-by-tag-name <> "table")
+ car
+ (plump:get-elements-by-tag-name <> "td")
+ (mapcar (rcurry #'plump:get-elements-by-tag-name "span") <>)
+ (remove-if-not #'identity <>)
+ (mapcar #'first <>)
+ (mapcar #'plump:text <>)))
+
+(defvar *beer* nil)
+
+(defun build-beer-corpus ()
+ (length
+ (setf *beer*
+ (iterate
+ (for page :from 1 :to 30)
+ (appending (ratebeer-clean (ratebeer-get page)) :into reviews)
+ (finally (return (format nil "~{~A~%~}" reviews)))))))
+
+
+
+(defun wine-get-list (page-number)
+ (-<> (format nil "http://www.winemag.com/?s=&drink_type=wine&page=~D"
+ page-number)
+ drakma:http-request
+ plump:parse))
+
+(defun wine-get-review (url)
+ (-<> url
+ drakma:http-request
+ plump:parse))
+
+
+(defun wine-clean-list (list-page)
+ (-<> list-page
+ (clss:select "a.review-listing" <>)
+ (map 'list (rcurry #'plump:attribute "href") <>)))
+
+(defun wine-clean-review (review-page)
+ (plump:text (elt (clss:select "#review .description" review-page) 0)))
+
+
+(defparameter *wine* nil)
+(defun build-wine-corpus ()
+ (length
+ (setf *wine*
+ (iterate
+ (for page :from 1 :to 10)
+ (for review-links = (wine-clean-list (wine-get-list page)))
+ (appending (mapcar (compose #'wine-clean-review #'wine-get-review)
+ review-links)
+ :into reviews)
+ (finally (return (format nil "~{~A~%~}" reviews)))))))
+
+(defparameter *m*
+ (build-markov-generator (concatenate 'string *hn* *wine*) 2))
+
+
+(iterate (repeat 50)
+ (for sentence = (generate-sentence *m*))
+ (when (<= (length sentence) 140)
+ (terpri)
+ (terpri)
+ (princ sentence)))