URI:
       tcomfix.awk - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tcomfix.awk (1202B)
       ---
            1 # when raw index has a lot of entries like
            2 # 1578324        problematico, a, ci, che
            3 # apply this algorithm:
            4 #  treat things after comma as suffixes
            5 #  for each suffix:
            6 #      if single letter, replace last letter
            7 #      else search backwards for beginning of suffix
            8 #      and if it leads to an old suffix of approximately
            9 #      the same length, put replace that suffix
           10 # This will still leave some commas to fix by hand
           11 # Usage: awk -F'        ' -f comfix.awk rawindex > newrawindex
           12 
           13 NF == 2        {
           14                 i = index($2, ",")
           15                 if(i == 0 || length($2) == 0)
           16                         print $0
           17                 else {
           18                         n = split($2, a, /,[ ]*/)
           19                         w = a[1]
           20                         printf "%s\t%s\n", $1, w
           21                         for(i = 2; i <= n; i++) {
           22                                 suf = a[i]
           23                                 m = matchsuflen(w, suf)
           24                                 if(m) {
           25                                         nw = substr(w, 1, length(w)-m) suf
           26                                         printf "%s\t%s\n", $1, nw
           27                                 } else
           28                                         printf "%s\t%s\n", $1, w ", " suf
           29                         }
           30                 }
           31         }
           32 NF != 2 {
           33         print $0
           34         }
           35 
           36 function matchsuflen(w, suf,                wlen,suflen,c,pat,k,d)
           37 {
           38         wlen = length(w)
           39         suflen = length(suf)
           40         if(suflen == 1)
           41                 return 1
           42         else {
           43                 c = substr(suf, 1, 1)
           44                 for (k = 1; k <= wlen ; k++)
           45                         if(substr(w, wlen-k+1, 1) == c)
           46                                 break
           47                 if(k > wlen)
           48                         return 0
           49                 d = k-suflen
           50                 if(d < 0)
           51                         d = -d
           52                 if(d > 3)
           53                         return 0
           54                 return k
           55         }
           56 }