Class TextFileInputUtils


  • public class TextFileInputUtils
    extends Object
    Some common methods for text file parsing.
    Author:
    Alexander Buloichik
    • Constructor Detail

      • TextFileInputUtils

        public TextFileInputUtils()
    • Method Detail

      • guessStringsFromLine

        public static final String[] guessStringsFromLine​(org.pentaho.di.core.variables.VariableSpace space,
                                                          org.pentaho.di.core.logging.LogChannelInterface log,
                                                          String line,
                                                          TextFileInputMeta inf,
                                                          String delimiter,
                                                          String enclosure,
                                                          String escapeCharacter)
                                                   throws org.pentaho.di.core.exception.KettleException
        Throws:
        org.pentaho.di.core.exception.KettleException
      • getLine

        public static final String getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                           BufferedInputStreamReader reader,
                                           int formatNr,
                                           StringBuilder line)
                                    throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • getLine

        public static final String getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                           BufferedInputStreamReader reader,
                                           EncodingType encodingType,
                                           int fileFormatType,
                                           StringBuilder line,
                                           String regex)
                                    throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • getLine

        public static final String getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                           BufferedInputStreamReader reader,
                                           EncodingType encodingType,
                                           int fileFormatType,
                                           StringBuilder line,
                                           String regex,
                                           String escapeChar)
                                    throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • getLine

        public static final TextFileLine getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                                 BufferedInputStreamReader reader,
                                                 EncodingType encodingType,
                                                 int fileFormatType,
                                                 StringBuilder line,
                                                 String regex,
                                                 long lineNumberInFile)
                                          throws org.pentaho.di.core.exception.KettleFileException
        Returns in the first position a line; ; on the second position how many lines from file were read to get a full line
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • getLine

        public static final TextFileLine getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                                 BufferedInputStreamReader reader,
                                                 EncodingType encodingType,
                                                 int fileFormatType,
                                                 StringBuilder line,
                                                 String regex,
                                                 String escapeChar,
                                                 long lineNumberInFile)
                                          throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • getLine

        public static final String getLine​(org.pentaho.di.core.logging.LogChannelInterface log,
                                           BufferedInputStreamReader reader,
                                           EncodingType encodingType,
                                           int formatNr,
                                           StringBuilder line)
                                    throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • convertLineToRow

        public static final Object[] convertLineToRow​(org.pentaho.di.core.logging.LogChannelInterface log,
                                                      TextFileLine textFileLine,
                                                      TextFileInputMeta info,
                                                      Object[] passThruFields,
                                                      int nrPassThruFields,
                                                      org.pentaho.di.core.row.RowMetaInterface outputRowMeta,
                                                      org.pentaho.di.core.row.RowMetaInterface convertRowMeta,
                                                      String fname,
                                                      long rowNr,
                                                      String delimiter,
                                                      String enclosure,
                                                      String escapeCharacter,
                                                      FileErrorHandler errorHandler,
                                                      BaseFileInputAdditionalField additionalOutputFields,
                                                      String shortFilename,
                                                      String path,
                                                      boolean hidden,
                                                      Date modificationDateTime,
                                                      String uri,
                                                      String rooturi,
                                                      String extension,
                                                      Long size)
                                               throws org.pentaho.di.core.exception.KettleException
        Throws:
        org.pentaho.di.core.exception.KettleException
      • convertLineToRow

        public static final Object[] convertLineToRow​(org.pentaho.di.core.logging.LogChannelInterface log,
                                                      TextFileLine textFileLine,
                                                      TextFileInputMeta info,
                                                      Object[] passThruFields,
                                                      int nrPassThruFields,
                                                      org.pentaho.di.core.row.RowMetaInterface outputRowMeta,
                                                      org.pentaho.di.core.row.RowMetaInterface convertRowMeta,
                                                      String fname,
                                                      long rowNr,
                                                      String delimiter,
                                                      String enclosure,
                                                      String escapeCharacter,
                                                      FileErrorHandler errorHandler,
                                                      BaseFileInputAdditionalField additionalOutputFields,
                                                      String shortFilename,
                                                      String path,
                                                      boolean hidden,
                                                      Date modificationDateTime,
                                                      String uri,
                                                      String rooturi,
                                                      String extension,
                                                      Long size,
                                                      boolean failOnParseError)
                                               throws org.pentaho.di.core.exception.KettleException
        Parameters:
        failOnParseError - if set to true, parsing failure on any line will cause parsing to be terminated; when set to false, parsing failure on a given line will not prevent remaining lines from being parsed - this allows us to analyze fields, even if some field is mis-configured and causes a parsing error for the values of that field.
        Throws:
        org.pentaho.di.core.exception.KettleException
      • convertLineToStrings

        public static final String[] convertLineToStrings​(org.pentaho.di.core.logging.LogChannelInterface log,
                                                          String line,
                                                          TextFileInputMeta inf,
                                                          String delimiter,
                                                          String enclosure,
                                                          String escapeCharacters)
                                                   throws org.pentaho.di.core.exception.KettleException
        Throws:
        org.pentaho.di.core.exception.KettleException
      • checkPattern

        public static int checkPattern​(String text,
                                       String regexChar)
        Finds a pattern within a String returning the occurrences number
        Parameters:
        text - String to be evaluated
        regexChar - String regexChar
        Returns:
        pattern occurrences number
      • checkPattern

        public static int checkPattern​(String text,
                                       String regexChar,
                                       String escapeCharacter)
        Finds a pattern within a String returning the occurrences number
        Parameters:
        text - String to be evaluated
        regexChar - String regexChar
        escapeCharacter - String escapeCharacter, an empty string will be ignored
        Returns:
        pattern occurrences number
      • skipLines

        public static long skipLines​(org.pentaho.di.core.logging.LogChannelInterface log,
                                     BufferedInputStreamReader reader,
                                     EncodingType encodingType,
                                     int fileFormatType,
                                     StringBuilder line,
                                     int nrLinesToSkip,
                                     String regex,
                                     long lineNumberInFile)
                              throws org.pentaho.di.core.exception.KettleFileException
        Returns the line number in file
        Throws:
        org.pentaho.di.core.exception.KettleFileException
      • skipLines

        public static long skipLines​(org.pentaho.di.core.logging.LogChannelInterface log,
                                     BufferedInputStreamReader reader,
                                     EncodingType encodingType,
                                     int fileFormatType,
                                     StringBuilder line,
                                     int nrLinesToSkip,
                                     String regex,
                                     String escapeChar,
                                     long lineNumberInFile)
                              throws org.pentaho.di.core.exception.KettleFileException
        Throws:
        org.pentaho.di.core.exception.KettleFileException