View Javadoc
1   package org.sentrysoftware.jawk.util;
2   
3   /*-
4    * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲
5    * Jawk
6    * ჻჻჻჻჻჻
7    * Copyright (C) 2006 - 2023 Sentry Software
8    * ჻჻჻჻჻჻
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation, either version 3 of the
12   * License, or (at your option) any later version.
13   *
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU General Lesser Public
20   * License along with this program.  If not, see
21   * <http://www.gnu.org/licenses/lgpl-3.0.html>.
22   * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱
23   */
24  
25  import java.io.File;
26  import java.io.IOException;
27  import java.io.PrintStream;
28  import java.io.StringReader;
29  import java.net.URISyntaxException;
30  import java.util.Locale;
31  
32  import org.slf4j.Logger;
33  
34  /**
35   * Manages the command-line parameters accepted by Jawk.
36   * The parameters and their meanings are provided below:
37   *
38   * <ul>
39   * <li>-v name=val [-v name=val] ... <br/>
40   *   Variable assignments prior to the execution of the script.
41   * <li>-F regexp <br/>
42   *   Field separator (FS).
43   * <li>-f filename <br/>
44   *   Use the text contained in filename as the script rather than
45   *   obtaining it from the command-line.
46   * <li><i>Extension</i> -c <br/>
47   *   Write intermediate file. Intermediate file can be used as
48   *   an argument to -f.
49   * <li><i>Extension</i> -o filename <br/>
50   *   Output filename for intermediate file, tuples, or syntax tree.
51   * <li><i>Extension</i> -s <br/>
52   *   Dump the intermediate code.
53   * <li><i>Extension</i> -S <br/>
54   *   Dump the syntax tree.
55   * <li><i>Extension</i> -x <br/>
56   *   Enables _sleep, _dump, and exec keywords/functions.
57   * <li><i>Extension</i> -y <br/>
58   *   Enables _INTEGER, _DOUBLE, and _STRING type casting keywords.
59   * <li><i>Extension</i> -t <br/>
60   *   Maintain array keys in sorted order (using a TreeMap instead of a HashMap)
61   * <li><i>Extension</i> -r <br/>
62   *   Do NOT error for <code>IllegalFormatException</code> when using
63   *   <code>java.util.Formatter</code> for <code>sprintf</code>
64   *   and <code>printf</code>.
65   * <li><i>Extension</i> -ext <br/>
66   *   Enabled user-defined extensions. Works together with the
67   *   -Djava.extensions property.
68   *   It also disables blank rule as mapping to a print $0 statement.
69   * <li><i>Extension</i> -ni <br/>
70   *   Do NOT consume stdin or files from ARGC/V through input rules.
71   *   The motivation is to leave input rules for blocking extensions
72   *   (i.e., Sockets, Dialogs, etc).
73   * </ul>
74   * followed by the script (if -f is not provided), then followed
75   * by a list containing zero or more of the following parameters:
76   * <ul>
77   * <li>name=val <br/>
78   *   Variable assignments occurring just prior to receiving input
79   *   (but after the BEGIN blocks, if any).
80   * <li>filename <br/>
81   *   Filenames to treat as input to the script.
82   * </ul>
83   * <p>
84   * If no filenames are provided, stdin is used as input
85   * to the script (but only if there are input rules).
86   *
87   * @author Danny Daglas
88   */
89  public class AwkParameters {
90  
91  	private static final Logger LOG = AwkLogger.getLogger(AwkParameters.class);
92  
93  	private static final String JAR_NAME;
94  	static {
95  		String myName;
96  		try {
97  			File me = new File(AwkParameters.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath());
98  			myName = me.getName();
99  		}
100 		catch (URISyntaxException e) {
101 			myName = "Jawk.jar";
102 		}
103 		JAR_NAME = myName;
104 	}
105 
106 	/**
107 	 * Parses AWK command line parameters,
108 	 * for example from the VM entry point <code>main()</code>.
109 	 * <p>
110 	 * The command-line argument semantics are as follows:
111 	 * <ul>
112 	 * <li>First, "-" arguments are processed until first non-"-" argument
113 	 *   is encountered, or the "-" itself is provided.
114 	 * <li>Next, a script is expected (unless the -f argument was provided).
115 	 * <li>Then, subsequent parameters are passed into the script
116 	 *   via the ARGC/ARGV variables.
117 	 * </ul>
118 	 *
119 	 * @param args The command-line arguments provided by the user.
120 	 * @return a new instance of AwkSettings that reflects the configuration
121 	 *   set in the command line arguments
122 	 */
123 	public static AwkSettings parseCommandLineArguments(String[] args) {
124 
125 		AwkSettings settings = new AwkSettings();
126 
127 		int argIdx = 0;
128 		try {
129 			// optional parameter mode (i.e. args[i].charAt(0) == '-')
130 			while (argIdx < args.length) {
131 				assert args[argIdx] != null;
132 				if (args[argIdx].length() == 0) {
133 					throw new IllegalArgumentException("zero-length argument at position " + (argIdx + 1));
134 				}
135 				if (args[argIdx].charAt(0) != '-') {
136 					// no more -X arguments
137 					break;
138 				} else if (args[argIdx].equals("-")) {
139 					// no more -X arguments
140 					++argIdx;
141 					break;
142 				} else if (args[argIdx].equals("-v")) {
143 					checkParameterHasArgument(args, argIdx);
144 					++argIdx;
145 					checkInitialVariableFormat(args[argIdx]);
146 					addVariable(settings, args[argIdx]);
147 				} else if (args[argIdx].equals("-f")) {
148 					checkParameterHasArgument(args, argIdx);
149 					++argIdx;
150 					settings.addScriptSource(new ScriptFileSource(args[argIdx]));
151 				} else if (args[argIdx].equals("-c")) {
152 					settings.setWriteIntermediateFile(true);
153 				} else if (args[argIdx].equals("-o")) {
154 					checkParameterHasArgument(args, argIdx);
155 					++argIdx;
156 					settings.setOutputFilename(args[argIdx]);
157 				} else if (args[argIdx].equals("-S")) {
158 					settings.setDumpSyntaxTree(true);
159 				} else if (args[argIdx].equals("-s")) {
160 					settings.setDumpIntermediateCode(true);
161 				} else if (args[argIdx].equals("-x")) {
162 					settings.setAdditionalFunctions(true);
163 				} else if (args[argIdx].equals("-y")) {
164 					settings.setAdditionalTypeFunctions(true);
165 				} else if (args[argIdx].equals("-t")) {
166 					settings.setUseSortedArrayKeys(true);
167 				} else if (args[argIdx].equals("-r")) {
168 					settings.setCatchIllegalFormatExceptions(false);
169 				} else if (args[argIdx].equals("-F")) {
170 					checkParameterHasArgument(args, argIdx);
171 					++argIdx;
172 					settings.setFieldSeparator(args[argIdx]);
173 				} else if (args[argIdx].equals("--locale")) {
174 					checkParameterHasArgument(args, argIdx);
175 					++argIdx;
176 					settings.setLocale(new Locale(args[argIdx]));
177 				} else if (args[argIdx].equals("-ext")) {
178 					settings.setUserExtensions(true);
179 				} else if (args[argIdx].equals("-h") || args[argIdx].equals("-?")) {
180 					if (args.length > 1) {
181 						throw new IllegalArgumentException("When printing help/usage output, we do not accept other arguments.");
182 					}
183 					usage(System.out);
184 					System.exit(0);
185 				} else {
186 					throw new IllegalArgumentException("Unknown parameter: " + args[argIdx]);
187 				}
188 
189 				++argIdx;
190 			}
191 
192 			// script mode (if -f is not provided)
193 			if (settings.getScriptSources().isEmpty()) {
194 				if (argIdx >= args.length) {
195 					throw new IllegalArgumentException("Awk script not provided.");
196 				}
197 				String scriptContent = args[argIdx++];
198 				settings.addScriptSource(new ScriptSource(
199 						ScriptSource.DESCRIPTION_COMMAND_LINE_SCRIPT,
200 						new StringReader(scriptContent),
201 						false));
202 			} else {
203 				// XXX Maybe we should delay that to a later stage? The only difference would be, that errors (for example: File not found, or unable to read) would occure later
204 				// initialize the Readers or InputStreams
205 				for (ScriptSource scriptSource : settings.getScriptSources()) {
206 					try {
207 						if (scriptSource.isIntermediate()) {
208 							scriptSource.getInputStream();
209 						} else {
210 							scriptSource.getReader();
211 						}
212 					} catch (IOException ex) {
213 						LOG.error("Failed to read script '" + scriptSource.getDescription() + "'", ex);
214 						System.exit(1);
215 					}
216 				}
217 			}
218 		} catch (IllegalArgumentException iae) {
219 			LOG.error("Failed to parse arguments. Please see the help/usage output (cmd line switch '-h').", iae);
220 			System.exit(1);
221 		}
222 
223 		// name=val or filename mode
224 		while (argIdx < args.length) {
225 			String nameValueOrFileName = args[argIdx++];
226 			settings.getNameValueOrFileNames().add(nameValueOrFileName);
227 		}
228 
229 		return settings;
230 	}
231 
232 	/**
233 	 * Dump usage to stderr; exit with a non-zero error code.
234 	 */
235 	private static void usage(PrintStream dest) {
236 		//String cls = Awk.class.getName();
237 		dest.println("Usage:");
238 		dest.println(
239 				"java -jar " + JAR_NAME + " [-F fs_val]"
240 				+ " [-f script-filename]"
241 				+ " [-o output-filename]"
242 				+ " [-c]"
243 				+ " [-S]"
244 				+ " [-s]"
245 				+ " [-x]"
246 				+ " [-y]"
247 				+ " [-r]"
248 				+ " [--locale locale]"
249 				+ " [-ext]"
250 				+ " [-t]"
251 				+ " [-v name=val]..."
252 				+ " [script]"
253 				+ " [name=val | input_filename]...");
254 		dest.println();
255 		dest.println(" -F fs_val = Use fs_val for FS.");
256 		dest.println(" -f filename = Use contents of filename for script.");
257 		dest.println(" -v name=val = Initial awk variable assignments.");
258 		dest.println();
259 		dest.println(" -t = (extension) Maintain array keys in sorted order.");
260 		dest.println(" -c = (extension) Compile to intermediate file. (default: a.ai)");
261 		dest.println(" -o = (extension) Specify output file.");
262 		dest.println(" -S = (extension) Write the syntax tree to file. (default: syntax_tree.lst)");
263 		dest.println(" -s = (extension) Write the intermediate code to file. (default: avm.lst)");
264 		dest.println(" -x = (extension) Enable _sleep, _dump as keywords, and exec as a builtin func.");
265 		dest.println(" -y = (extension) Enable _INTEGER, _DOUBLE, and _STRING casting keywords.");
266 		dest.println(" -r = (extension) Do NOT hide IllegalFormatExceptions for [s]printf.");
267 		dest.println(" --locale Locale = (extension) Specify a locale to be used instead of US-English");
268 		dest.println("-ext= (extension) Enable user-defined extensions. (default: not enabled)");
269 		dest.println();
270 		dest.println(" -h or -? = (extension) This help screen.");
271 	}
272 
273 	/**
274 	 * Validates that a required argument is provided with the parameter.
275 	 * This could have been done with a simple
276 	 * <code>if (argIdx+1 &gt;= args.length) ...</code>.
277 	 * However,
278 	 * <ul>
279 	 * <li>this normalizes the implementation throughout the class.
280 	 * <li>additional assertions are performed.
281 	 * </ul>
282 	 */
283 	private static void checkParameterHasArgument(String[] args, int argIdx) {
284 		assert argIdx < args.length;
285 		assert args[argIdx].charAt(0) == '-';
286 		if (argIdx + 1 >= args.length) {
287 			throw new IllegalArgumentException("Need additional argument for " + args[argIdx]);
288 		}
289 	}
290 
291 	/**
292 	 * Makes sure the argument is of the form name=value.
293 	 */
294 	private static void checkInitialVariableFormat(String keyValue) {
295 		int equalsCount = 0;
296 		int length = keyValue.length();
297 		for (int i = 0; equalsCount <= 1 && i < length; i++) {
298 			if (keyValue.charAt(i) == '=') {
299 				++equalsCount;
300 			}
301 		}
302 		if (equalsCount != 1) {
303 			throw new IllegalArgumentException("keyValue \"" + keyValue + "\" must be of the form \"name=value\"");
304 		}
305 	}
306 
307 	private static void addVariable(AwkSettings settings, String keyValue) {
308 		int equalsIdx = keyValue.indexOf('=');
309 		assert equalsIdx >= 0;
310 		String name = keyValue.substring(0, equalsIdx);
311 		String valueString = keyValue.substring(equalsIdx + 1);
312 		Object value;
313 		// deduce type
314 		try {
315 			value = Integer.parseInt(valueString);
316 		} catch (NumberFormatException nfe) {
317 			try {
318 				value = Double.parseDouble(valueString);
319 			} catch (NumberFormatException nfe2) {
320 				value = valueString;
321 			}
322 		}
323 		// note: this can overwrite previously defined variables
324 		settings.getVariables().put(name, value);
325 	}
326 }