tImproved swish indexing - tomb - the crypto undertaker
HTML git clone git://parazyd.org/tomb.git
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit 064b9c964c369ffdcd52eb1b27f7db04428b9882
DIR parent f3c18819d24976213554625be822ac1ccab2ae05
HTML Author: Jaromil <jaromil@dyne.org>
Date: Thu, 20 Jun 2013 10:26:12 +0200
Improved swish indexing
swish-e usage for indexing of file contents is stable
Diffstat:
M tomb | 65 ++++++++++++++++++++++++++-----
1 file changed, 55 insertions(+), 10 deletions(-)
---
DIR diff --git a/tomb b/tomb
t@@ -1754,31 +1754,76 @@ index_tombs() {
swishrc=`safe_filename swish`
cat <<EOF > $swishrc
+# index directives
DefaultContents TXT*
+IndexDir $tombmount
+IndexFile $tombmount/.swish
+# exclude images
+FileRules filename regex /\.jp.?g/i
+FileRules filename regex /\.png/i
+FileRules filename regex /\.gif/i
+FileRules filename regex /\.tiff/i
+FileRules filename regex /\.svg/i
+FileRules filename regex /\.xcf/i
+FileRules filename regex /\.eps/i
+FileRules filename regex /\.ttf/i
+# exclude audio
+FileRules filename regex /\.mp3/i
+FileRules filename regex /\.ogg/i
+FileRules filename regex /\.wav/i
+FileRules filename regex /\.mod/i
+FileRules filename regex /\.xm/i
+# exclude video
+FileRules filename regex /\.mp4/i
+FileRules filename regex /\.avi/i
+FileRules filename regex /\.ogv/i
+FileRules filename regex /\.ogm/i
+FileRules filename regex /\.mkv/i
+FileRules filename regex /\.mov/i
+# exclude system
+FileRules filename is ok
+FileRules filename is lock
+FileRules filename is control
+FileRules filename is status
+FileRules filename is proc
+FileRules filename is sys
+FileRules filename is supervise
+FileRules filename regex /\.asc$/i
+FileRules filename regex /\.gpg$/i
+
+# pdf and postscript
FileFilter .pdf pdftotext "'%p' -"
-
-FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/
-FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/
+FileFilter .ps ps2txt "'%p' -"
+# compressed files
+FileFilterMatch lesspipe "%p" /\.tgz$/i
+FileFilterMatch lesspipe "%p" /\.zip$/i
+FileFilterMatch lesspipe "%p" /\.gz$/i
+FileFilterMatch lesspipe "%p" /\.bz2$/i
+FileFilterMatch lesspipe "%p" /\.Z$/
+# spreadsheets
+FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/i
+FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/i
FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P"
-
-FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/
-FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/
-FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/
-
+# word documents
+FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/i
+FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/i
+FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/i
+FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.tex$/i
+# native html support
IndexContents HTML* .htm .html .shtml
IndexContents XML* .xml
-IndexDir $tombmount
-IndexFile $tombmount/.swish
EOF
+
xxx "Using swish-e to create index"
swish-e -c $swishrc -S fs -v3
rm -f $swishrc
}
+
say "search index updated"
done
}