Never been to CodeSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

pcregrep - UTF-8 aware grep replacement

# we first have to download, compile & install the PCRE library, cf. http://www.pcre.org/pcre.txt
# requirement: Xcode, http://developer.apple.com/tools/xcode/index.html

cd ~/Desktop
/usr/bin/curl -L -O http://downloads.sourceforge.net/pcre/pcre-7.7.tar.gz
/usr/bin/tar -xzf pcre-7.7.tar.gz
cd pcre-7.7
./configure --help
./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties
# for Intel Macs, see http://hivelogic.com/articles/2005/12/ruby_rails_lighttpd_mysql_tiger
#./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties CFLAGS=-O1
/usr/bin/make
/usr/bin/sudo /usr/bin/make install 


ls -l /usr/local/bin/pcregrep
stat -x /usr/local/bin/pcregrep

pcregrep --version
pcregrep --help
pcregrep --help | pcregrep -i 'utf-?8'
pcregrep --help | pcregrep -i multiline

man pcregrep
man pcrepattern
man pcretest
man perlretut

man pcregrep | less -p utf-8
man pcregrep | less -p multiline
man perlretut | less -p 'single line and multi'

open /usr/local/share/doc/pcre/html/pcregrep.html


# check if character set encoding of Terminal.app is set to UTF-8
if [[ "$(/usr/bin/defaults read com.apple.Terminal StringEncoding)" != "4" ]]; then 
   echo 'Terminal.app does not use UTF-8 character set encoding!'
   exit 1
fi


utf8str=$'caf\303\251'

printf $utf8str | /usr/bin/egrep -o '.'
printf $utf8str | /usr/local/bin/pcregrep -o '.'
printf $utf8str | /usr/local/bin/pcregrep -ou '.'     # UTF-8 aware
printf $utf8str | /usr/local/bin/pcregrep -ou 'f.$'

printf $utf8str | /usr/bin/egrep -o '.' | wc -l
printf $utf8str | /usr/local/bin/pcregrep -o '.' | wc -l
printf $utf8str | /usr/local/bin/pcregrep -ou '.' | wc -l     # UTF-8 aware


#---------------------------------------------


# cf. also The Heirloom Project, http://heirloom.sourceforge.net
# download & install from http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html

# backup ~/.bash_login with time stamp in filename
/bin/cp -ip "${HOME}/.bash_login"{,".orig-$(/bin/date +%Y-%m-%d-%H.%M.%S)"}


# To use the Heirloom tools insert the following statements into your ~/.profile or ~/.bash_login:

/bin/cat >> "${HOME}/.bash_login" <<-'EOF'

# Heirloom userland
# http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html

HEIRLOOM=/opt/heirloom
if [[ -d $HEIRLOOM ]]; then
  export HEIRLOOM
  PATH=$PATH:$HEIRLOOM/5bin
else
  unset HEIRLOOM
fi

EOF


# make Heirloom source the ~/.bash_login shell functions
/bin/ls -lo /opt/heirloom/etc/5.rc
/usr/bin/sudo /bin/cp -ip /opt/heirloom/etc/5.rc{,.orig}   # backup
/usr/bin/sudo /bin/chmod 766 /opt/heirloom/etc/5.rc

# ~/.bash_login should "source ~/.bashrc" and "bind -f ~/.inputrc"
/usr/bin/sudo echo 'source ~/.bash_login' >> /opt/heirloom/etc/5.rc

/usr/bin/sudo /usr/sbin/chown root:admin /opt/heirloom/etc/5.rc
/usr/bin/sudo /bin/chmod 644 /opt/heirloom/etc/5.rc
/bin/ls -lo /opt/heirloom/etc/5.rc

# delete the last (added) line
#/usr/bin/sudo /usr/bin/sed -i '' '$,$d' /opt/heirloom/etc/5.rc

/usr/bin/open -e /opt/heirloom/etc/5.rc


source ~/.bash_login


/usr/bin/open -e /opt/heirloom/README
/usr/bin/open /opt/heirloom/{,{etc/,5bin/}}      # open three directories in one go
/usr/bin/open /opt/heirloom/doc/{,doctools}
/usr/bin/open /opt/heirloom/doc/{,doctools/{,quickstart.pdf}}  
/usr/bin/open /opt/heirloom/doc/doctools/quickstart.pdf       # explore Heirloom troff

5 man intro | less -p 'Multibyte character encodings'
5 man sh | less
5 man tsort | less
5 whodo
5
man pgrep
pgrep sh
pgrep bash
man bfs | less  # bfs - big file scanner


/usr/bin/stat -x $HEIRLOOM/5bin/5
/usr/bin/stat -f '%N:  %HT%SY' $HEIRLOOM/5bin/5
/usr/bin/stat -f $'%N:  \e[1m%HT%SY\e[m' /opt/heirloom/bin/tsort
/usr/bin/stat -f $'%N:  \e[1;31m%HT\e[m%SY' /opt/heirloom/5bin/awk


/usr/bin/open http://heirloom.sourceforge.net/man/grep.1.html
5 man grep | less


5

utf8str=$'caf\303\251'
echo $utf8str

printf "${utf8str}\n" | /usr/local/bin/pcregrep -u 'f.$'
printf "${utf8str}\n" | /opt/heirloom/5bin/grep 'f.$'
printf "${utf8str}\n" | /opt/heirloom/5bin/posix/grep -E -e 'f.$'

Login window from the command line with Pashua

export PATH="/usr/bin:/bin:/usr/sbin:/sbin"
export IFS=$' \t\n'

mkdir -p ~/Applications

ls -ld ~/Applications
stat -x ~/Applications

cd ~/Applications
curl -L -O http://www.bluem.net/files/Pashua.dmg
hdiutil mount Pashua.dmg
cp -R /Volumes/Pashua ~/Applications/Pashua
hdiutil unmount /Volumes/Pashua

cd ~/Applications/Pashua/Examples
cp -p example.sh example.sh.orig   # backup


# some in-place text editing commands to modify ~/Applications/Pashua/Examples/example.sh
# cf. http://bash-hackers.org/wiki/doku.php?id=howto:edit-ed

export FILE="${HOME}/Applications/Pashua/Examples/example.sh"

# replace #!/bin/sh with #!/bin/bash
/bin/ed -s "${FILE}" <<< $'1,1s|bin/sh|bin/bash|\nw'

# to set the encoding to UTF-8 we add: set -- test utf8
/bin/ed -s "${FILE}" <<< $',s|\(.*Manage encoding.*\)|set -- test utf8 # set $2 to "utf8"\\\n\\\n\\1|\nw'

# delete all lines after first regex match /conf="/
/bin/ed -s "${FILE}" <<< $'/conf="/;$d\nw'


# add the following configuration

/bin/cat >> "${FILE}" <<-'EOF'

conf="

# Set transparency: 0 is transparent, 1 is opaque
*.transparency=0.95

# Set window title
*.title = Login Window

*.x = 550
*.y = 300
*.autoclosetime = 300

name.type = textfield
name.label = Please enter your name:
name.width = 280
name.x = 0
name.y = 110

password.type = password
password.label = Please enter your password:
password.width = 280
password.x = 0          
password.y = 45        

# Add a cancel button with default label
cb.type = cancelbutton

";   # end conf


pashua_run "$conf"
#pashua_run "$conf" "utf8"   # alternative to "set -- test utf8" above


if [[ ${cb} -ne 0 ]]; then echo 'Login cancelled!'; exit 1; fi

printf "%s\n" "name = ${name}"

printf "%s\n" "${name}" | ruby -n -e 'p $_.to_s'

# the following command requires #!/bin/bash
# cf. http://www.lugbz.org/pipermail/lugbz-list/2006-December/016360.html
/bin/ed -s <((printf "%s\n" "${name}")) <<< $',l'   

printf "%s\n" "password = ${password}"
printf "%s\n" "cb = ${cb}"

#printf "%s\n" "${password}" | /usr/bin/sudo -S /bin/ls | /usr/bin/head -n 5

/sbin/md5 -qs "${password}"

EOF


# run the script
~/Applications/Pashua/Examples/example.sh

UTF8 encode/decode helper library

var $utf8 = {  
      
    encode : function (string) {  
        string = string.replace(/\r\n/g,"\n");  
        var utftext = "";  
  
        for (var n=0, k=string.length; n < k; n++) {  
            var c = string.charCodeAt(n);  
  
            if (c < 128) {  
                utftext += String.fromCharCode(c);  
            }  
            else if((c > 127) && (c < 2048)) {  
                utftext += String.fromCharCode((c >> 6) | 192);  
                utftext += String.fromCharCode((c & 63) | 128);  
            }  
            else {  
                utftext += String.fromCharCode((c >> 12) | 224);  
                utftext += String.fromCharCode(((c >> 6) & 63) | 128);  
                utftext += String.fromCharCode((c & 63) | 128);  
            }  
        }  
        return utftext;  
    },  
  
    decode : function (utftext) {  
        var string = "";  
        var i = 0;  
        var c = c1 = c2 = 0;  
  
        while ( i < utftext.length ) {  
            c = utftext.charCodeAt(i);  
            if (c < 128) {  
                string += String.fromCharCode(c);  
                i++;  
            }  
            else if((c > 191) && (c < 224)) {  
                c2 = utftext.charCodeAt(i+1);  
                string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));  
                i += 2;  
            }  
            else {  
                c2 = utftext.charCodeAt(i+1);  
                c3 = utftext.charCodeAt(i+2);  
                string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));  
                i += 3;  
            }  
  
        }  
        return string;  
    }  
}