001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019import java.text.ParseException;
020import java.util.List;
021import java.util.ListIterator;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation FTPFileEntryParser and FTPFileListParser for standard
028 * Unix Systems.
029 *
030 * This class is based on the logic of Daniel Savarese's
031 * DefaultFTPListParser, but adapted to use regular expressions and to fit the
032 * new FTPFileEntryParser interface.
033 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
034 */
035public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl
036{
037
038    static final String DEFAULT_DATE_FORMAT
039        = "MMM d yyyy"; //Nov 9 2001
040
041    static final String DEFAULT_RECENT_DATE_FORMAT
042        = "MMM d HH:mm"; //Nov 9 20:06
043
044    static final String NUMERIC_DATE_FORMAT
045        = "yyyy-MM-dd HH:mm"; //2001-11-09 20:06
046
047    // Suffixes used in Japanese listings after the numeric values
048    private static final String JA_MONTH = "\u6708";
049    private static final String JA_DAY   = "\u65e5";
050    private static final String JA_YEAR  = "\u5e74";
051
052    private static final String DEFAULT_DATE_FORMAT_JA
053        = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; //6月 3日 2003年
054
055    private static final String DEFAULT_RECENT_DATE_FORMAT_JA
056        = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; //8月 17日 20:10
057
058    /**
059     * Some Linux distributions are now shipping an FTP server which formats
060     * file listing dates in an all-numeric format:
061     * <code>"yyyy-MM-dd HH:mm</code>.
062     * This is a very welcome development,  and hopefully it will soon become
063     * the standard.  However, since it is so new, for now, and possibly
064     * forever, we merely accomodate it, but do not make it the default.
065     * <p>
066     * For now end users may specify this format only via
067     * <code>UnixFTPEntryParser(FTPClientConfig)</code>.
068     * Steve Cohen - 2005-04-17
069     */
070    public static final FTPClientConfig NUMERIC_DATE_CONFIG =
071        new FTPClientConfig(
072                FTPClientConfig.SYST_UNIX,
073                NUMERIC_DATE_FORMAT,
074                null);
075
076    /**
077     * this is the regular expression used by this parser.
078     *
079     * Permissions:
080     *    r   the file is readable
081     *    w   the file is writable
082     *    x   the file is executable
083     *    -   the indicated permission is not granted
084     *    L   mandatory locking occurs during access (the set-group-ID bit is
085     *        on and the group execution bit is off)
086     *    s   the set-user-ID or set-group-ID bit is on, and the corresponding
087     *        user or group execution bit is also on
088     *    S   undefined bit-state (the set-user-ID bit is on and the user
089     *        execution bit is off)
090     *    t   the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and
091     *        execution is on
092     *    T   the 1000 bit is turned on, and execution is off (undefined bit-
093     *        state)
094     *    e   z/OS external link bit
095     *    Final letter may be appended:
096     *    +   file has extended security attributes (e.g. ACL)
097     *    Note: local listings on MacOSX also use '@';
098     *    this is not allowed for here as does not appear to be shown by FTP servers
099     *    {@code @}   file has extended attributes
100     */
101    private static final String REGEX =
102        "([bcdelfmpSs-])" // file type
103        +"(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
104
105        + "\\s*"                                        // separator TODO why allow it to be omitted??
106
107        + "(\\d+)"                                      // link count
108
109        + "\\s+" // separator
110
111        + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?"                // owner name (optional spaces)
112        + "(?:(\\S+(?:\\s\\S+)*)\\s+)?"                 // group name (optional spaces)
113        + "(\\d+(?:,\\s*\\d+)?)"                        // size or n,m
114
115        + "\\s+" // separator
116
117        /*
118         * numeric or standard format date:
119         *   yyyy-mm-dd (expecting hh:mm to follow)
120         *   MMM [d]d
121         *   [d]d MMM
122         *   N.B. use non-space for MMM to allow for languages such as German which use
123         *   diacritics (e.g. umlaut) in some abbreviations.
124         *   Japanese uses numeric day and month with suffixes to distinguish them
125         *   [d]dXX [d]dZZ
126        */
127        + "("+
128            "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
129            "|(?:\\S{3}\\s+\\d{1,2})" +  // MMM [d]d
130            "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
131            "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")"+
132           ")"
133
134        + "\\s+" // separator
135
136        /*
137           year (for non-recent standard format) - yyyy
138           or time (for numeric or recent standard format) [h]h:mm
139           or Japanese year - yyyyXX
140        */
141        + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
142
143        + "\\s" // separator
144
145        + "(.*)"; // the rest (21)
146
147
148    // if true, leading spaces are trimmed from file names
149    // this was the case for the original implementation
150    final boolean trimLeadingSpaces; // package protected for access from test code
151
152    /**
153     * The default constructor for a UnixFTPEntryParser object.
154     *
155     * @throws IllegalArgumentException
156     * Thrown if the regular expression is unparseable.  Should not be seen
157     * under normal conditions.  It it is seen, this is a sign that
158     * <code>REGEX</code> is  not a valid regular expression.
159     */
160    public UnixFTPEntryParser()
161    {
162        this(null);
163    }
164
165    /**
166     * This constructor allows the creation of a UnixFTPEntryParser object with
167     * something other than the default configuration.
168     *
169     * @param config The {@link FTPClientConfig configuration} object used to
170     * configure this parser.
171     * @throws IllegalArgumentException
172     * Thrown if the regular expression is unparseable.  Should not be seen
173     * under normal conditions.  It it is seen, this is a sign that
174     * <code>REGEX</code> is  not a valid regular expression.
175     * @since 1.4
176     */
177    public UnixFTPEntryParser(final FTPClientConfig config)
178    {
179        this(config, false);
180    }
181
182    /**
183     * This constructor allows the creation of a UnixFTPEntryParser object with
184     * something other than the default configuration.
185     *
186     * @param config The {@link FTPClientConfig configuration} object used to
187     * configure this parser.
188     * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
189     * @throws IllegalArgumentException
190     * Thrown if the regular expression is unparseable.  Should not be seen
191     * under normal conditions.  It it is seen, this is a sign that
192     * <code>REGEX</code> is  not a valid regular expression.
193     * @since 3.4
194     */
195    public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces)
196    {
197        super(REGEX);
198        configure(config);
199        this.trimLeadingSpaces = trimLeadingSpaces;
200    }
201
202    /**
203     * Preparse the list to discard "total nnn" lines
204     */
205    @Override
206    public List<String> preParse(final List<String> original) {
207        final ListIterator<String> iter = original.listIterator();
208        while (iter.hasNext()) {
209            final String entry = iter.next();
210            if (entry.matches("^total \\d+$")) { // NET-389
211                iter.remove();
212            }
213        }
214        return original;
215    }
216
217    /**
218     * Parses a line of a unix (standard) FTP server file listing and converts
219     * it into a usable format in the form of an <code> FTPFile </code>
220     * instance.  If the file listing line doesn't describe a file,
221     * <code> null </code> is returned, otherwise a <code> FTPFile </code>
222     * instance representing the files in the directory is returned.
223     *
224     * @param entry A line of text from the file listing
225     * @return An FTPFile instance corresponding to the supplied entry
226     */
227    @Override
228    public FTPFile parseFTPEntry(final String entry) {
229        final FTPFile file = new FTPFile();
230        file.setRawListing(entry);
231        final int type;
232        boolean isDevice = false;
233
234        if (matches(entry))
235        {
236            final String typeStr = group(1);
237            final String hardLinkCount = group(15);
238            final String usr = group(16);
239            final String grp = group(17);
240            final String filesize = group(18);
241            final String datestr = group(19) + " " + group(20);
242            String name = group(21);
243            if (trimLeadingSpaces) {
244                name = name.replaceFirst("^\\s+", "");
245            }
246
247            try
248            {
249                if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
250                    final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
251                    jaParser.configure(new FTPClientConfig(
252                            FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
253                    file.setTimestamp(jaParser.parseTimestamp(datestr));
254                } else {
255                    file.setTimestamp(super.parseTimestamp(datestr));
256                }
257            }
258            catch (final ParseException e)
259            {
260                 // intentionally do nothing
261            }
262
263            // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
264            // 'translucent' filesystems, of which a 'union' filesystem is one.
265
266            // bcdelfmpSs-
267            switch (typeStr.charAt(0))
268            {
269            case 'd':
270                type = FTPFile.DIRECTORY_TYPE;
271                break;
272            case 'e': // NET-39 => z/OS external link
273                type = FTPFile.SYMBOLIC_LINK_TYPE;
274                break;
275            case 'l':
276                type = FTPFile.SYMBOLIC_LINK_TYPE;
277                break;
278            case 'b':
279            case 'c':
280                isDevice = true;
281                type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
282                break;
283            case 'f':
284            case '-':
285                type = FTPFile.FILE_TYPE;
286                break;
287            default: // e.g. ? and w = whiteout
288                type = FTPFile.UNKNOWN_TYPE;
289            }
290
291            file.setType(type);
292
293            int g = 4;
294            for (int access = 0; access < 3; access++, g += 4)
295            {
296                // Use != '-' to avoid having to check for suid and sticky bits
297                file.setPermission(access, FTPFile.READ_PERMISSION,
298                                   !group(g).equals("-"));
299                file.setPermission(access, FTPFile.WRITE_PERMISSION,
300                                   !group(g + 1).equals("-"));
301
302                final String execPerm = group(g + 2);
303                if (!execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)))
304                {
305                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, true);
306                }
307                else
308                {
309                    file.setPermission(access, FTPFile.EXECUTE_PERMISSION, false);
310                }
311            }
312
313            if (!isDevice)
314            {
315                try
316                {
317                    file.setHardLinkCount(Integer.parseInt(hardLinkCount));
318                }
319                catch (final NumberFormatException e)
320                {
321                    // intentionally do nothing
322                }
323            }
324
325            file.setUser(usr);
326            file.setGroup(grp);
327
328            try
329            {
330                file.setSize(Long.parseLong(filesize));
331            }
332            catch (final NumberFormatException e)
333            {
334                // intentionally do nothing
335            }
336
337            // oddball cases like symbolic links, file names
338            // with spaces in them.
339            if (type == FTPFile.SYMBOLIC_LINK_TYPE)
340            {
341
342                final int end = name.indexOf(" -> ");
343                // Give up if no link indicator is present
344                if (end == -1)
345                {
346                    file.setName(name);
347                }
348                else
349                {
350                    file.setName(name.substring(0, end));
351                    file.setLink(name.substring(end + 4));
352                }
353
354            }
355            else
356            {
357                file.setName(name);
358            }
359            return file;
360        }
361        return null;
362    }
363
364    /**
365     * Defines a default configuration to be used when this class is
366     * instantiated without a {@link  FTPClientConfig  FTPClientConfig}
367     * parameter being specified.
368     * @return the default configuration for this parser.
369     */
370    @Override
371    protected FTPClientConfig getDefaultConfiguration() {
372        return new FTPClientConfig(
373                FTPClientConfig.SYST_UNIX,
374                DEFAULT_DATE_FORMAT,
375                DEFAULT_RECENT_DATE_FORMAT);
376    }
377
378}