Bottleneck rule



I wrote a function which loops through a long list of strings (about a
10000) modifies some according to some specified rules and finally it
groups similar strings together. When this is run (even loaded from a
compiled .fas file), it takes about 2 hours to complete (it's true at
some level there is an iteration through an array of 10 million rows
(10000*10000). I was sreading Paul Graham's book and in it he was
talking about the bottleneck rule and optimizing speed and says: If a
major bottleneck occurred in the inner loop of some function, we might
add
a declaration like the following:
(defun bottleneck (...)
(do (...)
(...)
(do (...)
(...)
(declare (optimize (speed 3) (safety 0)))
...) ) )

But I have no idea how to use that in my code? Anyone has any hint?
thanks. Below I attach the function just for info.
----------------------------------------------------------------------------------------------------------------------------------------
(defun GroupModifyStrings (intres)
"Function to group strings by removing the last chars
and using the edit distance (modified) to correct typing errors.
(GroupModifystrings 'false) prints out only the output,
(GroupModifystrings 'true)
prints out an intermediate store file to view intermediate results.
List should be sorted by decreasing counter order so that the
modification is done on strings with lower counts."
;;Remove last chars and group.
;;The Scan function.
(defun scan (file)
(with-open-file (stream file :direction :input)
(setq m (loop for input = (read stream nil stream)
until (eq input stream) collect input))))
;;Read the files ("string" and "counter").
(setf x (scan "C:/string.txt"))
(setf counter (scan "C:/counter.txt"))
;;The while macro.
(defmacro while (condition &rest body)
(let ((var (gensym)))
`(do ((,var nil (progn ,@body)))
((null ,condition) ,var))))
;;First remove the last chars and store in an array.
(setf store (make-array (list (length x) 1)))
(dotimes (i (length x))
(if (not (alpha-char-p (char (nth i x) (1- (length (nth i x))))))
(setf (nth i x) (concatenate 'string (nth i x) "Z"))))
(setf i 0)
(while (< i (length x))
(setf (aref store i 0)
(while (AND (alpha-char-p (char (nth i x) (1- (length (nth i
x))))) (> (length (nth i x)) 1))
(setf (nth i x) (string-right-trim (list (char (nth i x) (1-
(length (nth i x))))) (nth i x)))))
(incf i))
;;Store the array in a list.
(setf ls nil)
(dotimes (i (array-dimension store 0))
(setf ls (append ls (list (aref store i 0)))))
;;Tabulate like the pivot table.
(setf temp (make-array (list (length ls)) :initial-element '0))
(dotimes (i (length ls))
(setf j (1+ i))
(if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref
temp i)(nth i counter))))
(while (< j (length ls))
(if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls)
'zzz)))
(AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
(setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
(incf j)))
;;Remove the indicators.
(setf counter (coerce temp 'list))
(delete 'zzz ls)
(delete 0 counter)
;;Edit Distance.
(defun distance (s1 s2)
(let* ((width (1+ (length s1)))
(height (1+ (length s2)))
(d (make-array (list height width))))
(dotimes (x width)
(setf (aref d 0 x) x))
(dotimes (y height)
(setf (aref d y 0) y))
(dotimes (x (length s1))
(dotimes (y (length s2))
(setf (aref d (1+ y) (1+ x))
(min (1+ (aref d y (1+ x)))
(1+ (aref d (1+ y) x))
(+ (aref d y x)
(if (char= (aref s1 x) (aref s2 y))
0
1))))))
(aref d (1- height) (1- width))))
;;Test the Edit distance and store raw results.
(setf rows (*(1- (length ls))(length ls)))
(setf store (make-array (list rows 4)))
(setf k 0)
(dotimes (i (length ls))
(dotimes (j (length ls))
(when (not (equal (nth i ls) (nth j ls)))
(setf (aref store k 0) (nth i ls))
(setf (aref store k 1) (nth j ls) )
(setf (aref store k 2) (distance (nth i ls)(nth j ls)))
(incf k)
)))
(dotimes (i (array-dimension store 0))
(dotimes (j (length
ls))
(if (eq (aref store i 0) (nth j ls)) (setf (aref store i 3) (nth
j counter)))
))
;;Extra Rule: The last 3 digits must be equal.
(dotimes (i (array-dimension store 0))
(if (AND (not (equal (loop for n from 1 to 3 collect (digit-char-p
(char (aref store i 0) (- (length (aref store i 0)) n))))
(loop for n from 1 to 3 collect (digit-char-p
(char (aref store i 1) (- (length (aref store i 1)) n))))))
(< (aref store i 2) 2))
(setf (aref store i 2) 20)))
;;conditional printout of the intermediate array store after
applying rule.
(when (equal intres 'true)
(setf out-stream (open "storeint.txt" :direction :output))
(print store out-stream)
(close out-stream))
;;Map (redefine) similar strings to just one
(dotimes (j (array-dimension store 0))
(if (< (aref store j 2) 2)
(dotimes (i (array-dimension store 0))
(if (AND (equal (aref store i 0) (aref store j 1)) (< (aref
store i 2) 2))
(setf (aref store i 0) (aref store j 0))))))
;;The position helper function
(defun position-of-min (list ordering)
(let ((min-index (length ordering))
(min-value nil)
(position -1)
(i 0))
(dolist (x list (values position min-value))
(let ((pos (position x ordering)))
(when (and pos (<= pos min-index))
(setf position i min-index pos min-value x)))
(incf i))))
;;Grouped into a list.
(setf chunck (/ (array-dimension store 0) (length ls)))
(setf chorder (loop for i from 0 to chunck collect i))
(setf jj 0)
(setf n 0)
(setf arr (make-array (list (length ls) 2)))
(while (< jj rows)
(setf place (loop for i from jj below (+ jj chunck) collect (aref
store i 2)))
(setf (aref arr n 0) (aref store (+ jj (position-of-min place
chorder)) 0))
(setf (aref arr n 1) (aref store (+ jj (position-of-min place
chorder)) 3))
(setf n (1+ n))
(setf jj (+ jj chunck)))
;;Store the arr array in a list.
;;Reset ls and Counter first.
(setf ls nil)
(setf counter nil)
(dotimes (i (array-dimension arr 0))
(setf ls (append ls (list (aref arr i 0))))
(setf counter (append counter (list (aref arr i 1))))
)
;;Tabulate like the pivot table.
(setf temp (make-array (list (length ls)) :initial-element '0))
(dotimes (i (length ls))
(setf j (1+ i))
(if (not (equal (nth i ls) 'zzz)) (setf (aref temp i) (+ (aref
temp i)(nth i counter))))
(while (< j (length ls))
(if (AND (equal (nth i ls) (nth j ls)) (not (equal (nth j ls)
'zzz)))
(AND (setf (aref temp i) (+ (aref temp i) (nth j counter)))
(setf (nth j ls) 'zzz) (setf (nth j counter) 'zzz)))
(incf j)))
;;Remove the indicators.
(setf counter (coerce temp 'list))
(delete 'zzz ls)
(delete 0 counter)
;;Print output the lists of grouped strings and counts.
(setf out-stream (open "output.txt" :direction :output))
(dotimes (n (length ls))
(print (list (nth n ls)(nth n counter)) out-stream))
(close out-stream)
)

Example:
string.txt would contain:
"string1F"
"string1L"
"strinf1P"
"test2A"
"trest2Z"
"string2G"
"string2T"

counter.txt would contain:
42
25
12
9
6
4
3
(GROUPMODIFYSTRINGS 'true) would output:
("string1" 79)
("test2" 15)
("string2" 7)

.



Relevant Pages

  • Re: Bottleneck rule
    ... You need to introduce variables before you assign something with SETQ or SETF. ... "Function to group strings by removing the last chars ... prints out an intermediate store file to view intermediate results. ... ;;Print output the lists of grouped strings and counts. ...
    (comp.lang.lisp)
  • Re: Newb looking for data binding help
    ... store them in a custom generic list, or rather, instead of using array ... lists to store the strings, ... Two strings, one for a system name and one for equipment name ...
    (microsoft.public.dotnet.languages.vb)
  • Re: Lispy Python (was Re: Java, was Re: Be afraid of XML)
    ... >> But linear lisp has strings as the basic data structure instead ... >> of lists. ... Strings have characters ... (setf x ') ...
    (comp.lang.lisp)
  • Re: newbie Q: opposite of quote
    ... (eval (quote a)) ... a first Common Lisp lesson. ... you can use lists as short as you want. ... symbols, set, setq, setf, psetq, psetf. ...
    (comp.lang.lisp)
  • Re: Call by value, but (please check my hypothesis)
    ... objects are passed by reference in Lisp. ... > to lists ... SETF actually understands the syntax of the (GETF ...
    (comp.lang.lisp)