#! /bin/sh ########################################################################## # Title : urlgetopt - decode "urlencoded" forms data (RFC 1866) # Author : Heiner Steven # Date : 1999-06-27 # Category : CGI, File Conversion # Requires : - # SCCS-Id. : @(#) urlgetopt 1.6 05/04/20 ########################################################################## # usage # urlgetopt [-l] [-p prefix] [urlencoded-data] # -l: print fields on a separate line # # Description # Data entered within a HTML-FORM is transfered to the HTTP-Server # using the "application/x-www-form-urlencoded" encoding type # This script decodes "form-urlencoded" data in a form # suitable to be used within shell scripts # # Example: # The form-urlencoded string # name=Heiner+Steven&org=Heiner%27s+SHELLdorado # # will be decoded to # name='Heiner Steven' # org='Heiner'"'"'s SHELLdorado' # # usage example (i.e. from within a CGI script): # # eval `urlgetopt -p FORM_ -l "$QUERY_STRING"` # # Without "eval": # # OIFS=$IFS # IFS="\n" # set -- `urlgetopt -l "$QUERY_STRING"` # IFS=$OIFS # echo $# arguments: # for arg # do # echo "<$arg>" # done # # Notes # o Assumes form-data in the format "name=value", but should # handle other values as well # o An apostrophe ' within apostrophes cannot be written as '\'' (in sh, # bash, ksh88), so we use the work around ' "'" '. Using ksh93, # we could just use $'\''. # o If the last character of a value in an assignment is an # apostrophe ('), the printed assignment will contain two # superfluous apostrophes (''). # # Portability # Solaris 9 "nawk" # Linux 2.4 "awk" (= GNU awk) 3.1.0 # # Changes: # 2005-04-20 claudio allow values with leading whitespace (thanks to # Claudio Jolowicz ) (1.6) # 2002-03-26 heiner Handle a "-n" argument correctly (1.4) # Thanks to Brian Hiles for # pointing this out. ########################################################################## PN=`basename "$0"` # Program name VER="1.6" # Set the following variable to shorten startup time. Otherwise the # directories from the PATH variable will be searched for a suitable AWK # implementation #: ${NAWK=nawk} # Set this variable to speed up startup usage () { echo >&2 "$PN - decode \"urlencoded\" CGI form data, $VER usage: $PN [-lF] [-p prefix] [urlencoded-string] -l: print fields on a separate line -p: prefix for generated environment variable names -F: force output of invalid assignments Example: eval \`urlgetopt -l \"\$QUERY_STRING\"\`" exit 1 } # We use "getopts" instead of "getopt" to preserve whitespace within # arguments. VarPrefix= LongFormat=no EvalCheck=true while getopts :hFlp: opt do case "$opt" in F) EvalCheck=false;; h) usage;; l) LongFormat=yes;; p) VarPrefix=$OPTARG;; ?) usage;; esac done shift `expr $OPTIND - 1` # We need a new AWK program supporting the "gsub()" function. # Most AWKs (i.e. "gawk") do support it, but some older NAWKs do not. # Search for a "gsub" capable NAWK using the current PATH. if [ X"$NAWK" = X ] then for path in `echo "$PATH" | sed 's|^:|./ |;s|:$| ./|;s|:| |g'` do for awk in $path/*awk do [ -f "$awk" -a -x "$awk" ] || continue case "$awk" in *.*) continue; # ignore "script.awk" esac # Now check for the "gsub()" function result=`echo "UU" | $awk '{ gsub (/U/, "X"); print }' 2>/dev/null` [ X"$result" = X"XX" ] || continue NAWK=$awk; break 2 # Found! done done : ${NAWK:=awk} fi if [ $# -gt 0 ] then # Process arguments, if specified... # Special handling: BSD derived "echo" implementations # will not echo "-n", but omit the trailing newline. "echo --" # doesn't work as expected, too, so we add a non-dash character # as the first character to the "echo", and remove it later on. echo X"$@" | sed "s/^X//" else # ...otherwise read standard input cat fi | $NAWK -F'[&]' ' BEGIN { FieldSep = ("'"$LongFormat"'" == "yes") ? "\n" : " " VarPrefix = "'"$VarPrefix"'" evalcheck = ("'"$EvalCheck"'" == "true") Hex ["0"] = 0; Hex ["1"] = 1; Hex ["2"] = 2; Hex ["3"] = 3; Hex ["4"] = 4; Hex ["5"] = 5; Hex ["6"] = 6; Hex ["7"] = 7; Hex ["8"] = 8; Hex ["9"] = 9; Hex ["A"] = 10; Hex ["B"] = 11; Hex ["C"] = 12; Hex ["D"] = 13; Hex ["E"] = 14; Hex ["F"] = 15; squote = sprintf ("%c", 39) exitcode = 0 } { gsub (/\+/, " "); # Parse assignment separated by "&", e.g. "a=b&c=d&..." for ( field=1; field<=NF; ++field ) { # Substitute %HH with the "real" character if ( $field ~ /%[0-9A-F][0-9A-F]/ ) { newfield = "" for ( i=1; i<=length ($field); i++ ) { if ( substr ($field, i, 1) == "%" ) { dec = Hex [substr ($field, i+1, 1)] * 16 + \ Hex [substr ($field, i+2, 1)] newfield = sprintf ("%s%c", newfield, dec) i += 2; } else { newfield = newfield substr ($field, i, 1); } } $field = newfield } #print "after % processing " $0 # Decode "var=value" pairs. # # If "evalcheck" is true, we ensure that the result is a # valid variable assignment: # 1. it has the form varname=value # 2. the variable name must be a valid shell # identifier: start with letter or underscore, followed # only be letters, underscores or digits if (evalcheck && !match ($field, /^[a-zA-Z_][a-zA-Z_0-9]*=/)) { print "invalid assignment: " $field | "cat >&2" exit (exitcode=1); } # Now parse assignments of the form "a=b". if ( $field ~ /\=/ ) { newfield = "" equalseen = 0 fieldlength = length ($field) for ( i=1; i<=fieldlength; i++ ) { s = substr ($field, i, 1) if ( s == "=" ) { if ( !equalseen ) s = s squote equalseen = 1 } else if ( equalseen ) { # value if ( s == squote ) { # Special handling: since an apostrophe # (in this example represented by a dot # .) cannot be quoted using a backslash, # we use the following work around: we # replace it with "." if ( i&2" exit (exitcode=1) } #print "after = processing " $0 } for ( i=1; i<=NF; ++i ) { printf ("%s%s", VarPrefix, $i) if ( i