void QgsDelimitedTextProvider::scanFile( bool buildIndexes )
{
  QStringList messages;

  // assume the layer is invalid until proven otherwise

  mLayerValid = false;
  mValid = false;
  mRescanRequired = false;

  clearInvalidLines();

  // Initiallize indexes

  resetIndexes();
  bool buildSpatialIndex = buildIndexes && nullptr != mSpatialIndex;

  // No point building a subset index if there is no geometry, as all
  // records will be included.

  bool buildSubsetIndex = buildIndexes && mBuildSubsetIndex && mGeomRep != GeomNone;

  if ( ! mFile->isValid() )
  {
    // uri is invalid so the layer must be too...

    messages.append( tr( "File cannot be opened or delimiter parameters are not valid" ) );
    reportErrors( messages );
    QgsDebugMsg( "Delimited text source invalid - filename or delimiter parameters" );
    return;
  }

  // Open the file and get number of rows, etc. We assume that the
  // file has a header row and process accordingly. Caller should make
  // sure that the delimited file is properly formed.

  if ( mGeomRep == GeomAsWkt )
  {
    mWktFieldIndex = mFile->fieldIndex( mWktFieldName );
    if ( mWktFieldIndex < 0 )
    {
      messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( QStringLiteral( "Wkt" ), mWktFieldName ) );
    }
  }
  else if ( mGeomRep == GeomAsXy )
  {
    mXFieldIndex = mFile->fieldIndex( mXFieldName );
    mYFieldIndex = mFile->fieldIndex( mYFieldName );
    if ( mXFieldIndex < 0 )
    {
      messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( QStringLiteral( "X" ), mWktFieldName ) );
    }
    if ( mYFieldIndex < 0 )
    {
      messages.append( tr( "%0 field %1 is not defined in delimited text file" ).arg( QStringLiteral( "Y" ), mWktFieldName ) );
    }
  }
  if ( !messages.isEmpty() )
  {
    reportErrors( messages );
    QgsDebugMsg( "Delimited text source invalid - missing geometry fields" );
    return;
  }

  // Scan the entire file to determine
  // 1) the number of fields (this is handled by QgsDelimitedTextFile mFile
  // 2) the number of valid features.  Note that the selection of valid features
  //    should match the code in QgsDelimitedTextFeatureIterator
  // 3) the geometric extents of the layer
  // 4) the type of each field
  //
  // Also build subset and spatial indexes.

  QStringList parts;
  long nEmptyRecords = 0;
  long nBadFormatRecords = 0;
  long nIncompatibleGeometry = 0;
  long nInvalidGeometry = 0;
  long nEmptyGeometry = 0;
  mNumberFeatures = 0;
  mExtent = QgsRectangle();

  QList<bool> isEmpty;
  QList<bool> couldBeInt;
  QList<bool> couldBeLongLong;
  QList<bool> couldBeDouble;
  bool foundFirstGeometry = false;

  while ( true )
  {
    QgsDelimitedTextFile::Status status = mFile->nextRecord( parts );
    if ( status == QgsDelimitedTextFile::RecordEOF )
      break;
    if ( status != QgsDelimitedTextFile::RecordOk )
    {
      nBadFormatRecords++;
      recordInvalidLine( tr( "Invalid record format at line %1" ) );
      continue;
    }
    // Skip over empty records
    if ( recordIsEmpty( parts ) )
    {
      nEmptyRecords++;
      continue;
    }

    // Check geometries are valid
    bool geomValid = true;

    if ( mGeomRep == GeomAsWkt )
    {
      if ( mWktFieldIndex >= parts.size() || parts[mWktFieldIndex].isEmpty() )
      {
        nEmptyGeometry++;
        mNumberFeatures++;
      }
      else
      {
        // Get the wkt - confirm it is valid, get the type, and
        // if compatible with the rest of file, add to the extents

        QString sWkt = parts[mWktFieldIndex];
        QgsGeometry geom;
        if ( !mWktHasPrefix && sWkt.indexOf( sWktPrefixRegexp ) >= 0 )
          mWktHasPrefix = true;
        geom = geomFromWkt( sWkt, mWktHasPrefix );

        if ( !geom.isNull() )
        {
          QgsWkbTypes::Type type = geom.wkbType();
          if ( type != QgsWkbTypes::NoGeometry )
          {
            if ( mGeometryType == QgsWkbTypes::UnknownGeometry || geom.type() == mGeometryType )
            {
              mGeometryType = geom.type();
              if ( !foundFirstGeometry )
              {
                mNumberFeatures++;
                mWkbType = type;
                mExtent = geom.boundingBox();
                foundFirstGeometry = true;
              }
              else
              {
                mNumberFeatures++;
                if ( geom.isMultipart() )
                  mWkbType = type;
                QgsRectangle bbox( geom.boundingBox() );
                mExtent.combineExtentWith( bbox );
              }
              if ( buildSpatialIndex )
              {
                QgsFeature f;
                f.setId( mFile->recordId() );
                f.setGeometry( geom );
                mSpatialIndex->insertFeature( f );
              }
            }
            else
            {
              nIncompatibleGeometry++;
              geomValid = false;
            }
          }
        }
        else
        {
          geomValid = false;
          nInvalidGeometry++;
          recordInvalidLine( tr( "Invalid WKT at line %1" ) );
        }
      }
    }
    else if ( mGeomRep == GeomAsXy )
    {
      // Get the x and y values, first checking to make sure they
      // aren't null.

      QString sX = mXFieldIndex < parts.size() ? parts[mXFieldIndex] : QString();
      QString sY = mYFieldIndex < parts.size() ? parts[mYFieldIndex] : QString();
      if ( sX.isEmpty() && sY.isEmpty() )
      {
        nEmptyGeometry++;
        mNumberFeatures++;
      }
      else
      {
        QgsPointXY pt;
        bool ok = pointFromXY( sX, sY, pt, mDecimalPoint, mXyDms );

        if ( ok )
        {
          if ( foundFirstGeometry )
          {
            mExtent.combineExtentWith( pt.x(), pt.y() );
          }
          else
          {
            // Extent for the first point is just the first point
            mExtent.set( pt.x(), pt.y(), pt.x(), pt.y() );
            mWkbType = QgsWkbTypes::Point;
            mGeometryType = QgsWkbTypes::PointGeometry;
            foundFirstGeometry = true;
          }
          mNumberFeatures++;
          if ( buildSpatialIndex && std::isfinite( pt.x() ) && std::isfinite( pt.y() ) )
          {
            QgsFeature f;
            f.setId( mFile->recordId() );
            f.setGeometry( QgsGeometry::fromPointXY( pt ) );
            mSpatialIndex->insertFeature( f );
          }
        }
        else
        {
          geomValid = false;
          nInvalidGeometry++;
          recordInvalidLine( tr( "Invalid X or Y fields at line %1" ) );
        }
      }
    }
    else
    {
      mWkbType = QgsWkbTypes::NoGeometry;
      mNumberFeatures++;
    }

    if ( !geomValid )
      continue;

    if ( buildSubsetIndex )
      mSubsetIndex.append( mFile->recordId() );


    // If we are going to use this record, then assess the potential types of each column

    for ( int i = 0; i < parts.size(); i++ )
    {

      QString &value = parts[i];
      // Ignore empty fields - spreadsheet generated CSV files often
      // have random empty fields at the end of a row
      if ( value.isEmpty() )
        continue;

      // Expand the columns to include this non empty field if necessary

      while ( couldBeInt.size() <= i )
      {
        isEmpty.append( true );
        couldBeInt.append( false );
        couldBeLongLong.append( false );
        couldBeDouble.append( false );
      }

      // If this column has been empty so far then initiallize it
      // for possible types

      if ( isEmpty[i] )
      {
        isEmpty[i] = false;
        couldBeInt[i] = true;
        couldBeLongLong[i] = true;
        couldBeDouble[i] = true;
      }

      if ( ! mDetectTypes )
      {
        continue;
      }

      // Now test for still valid possible types for the field
      // Types are possible until first record which cannot be parsed

      if ( couldBeInt[i] )
      {
        value.toInt( &couldBeInt[i] );
      }

      if ( couldBeLongLong[i] && ! couldBeInt[i] )
      {
        value.toLongLong( &couldBeLongLong[i] );
      }

      if ( couldBeDouble[i] && ! couldBeLongLong[i] )
      {
        if ( ! mDecimalPoint.isEmpty() )
        {
          value.replace( mDecimalPoint, QLatin1String( "." ) );
        }
        value.toDouble( &couldBeDouble[i] );
      }
    }
  }

  // Now create the attribute fields.  Field types are integer by preference,
  // failing that double, failing that text.

  QStringList fieldNames = mFile->fieldNames();
  mFieldCount = fieldNames.size();
  attributeColumns.clear();
  attributeFields.clear();

  QString csvtMessage;
  QStringList csvtTypes = readCsvtFieldTypes( mFile->fileName(), &csvtMessage );

  for ( int i = 0; i < fieldNames.size(); i++ )
  {
    // Skip over WKT field ... don't want to display in attribute table
    if ( i == mWktFieldIndex )
      continue;

    // Add the field index lookup for the column
    attributeColumns.append( i );
    QVariant::Type fieldType = QVariant::String;
    QString typeName = QStringLiteral( "text" );
    if ( i < csvtTypes.size() )
    {
      typeName = csvtTypes[i];
    }
    else if ( mDetectTypes && i < couldBeInt.size() )
    {
      if ( couldBeInt[i] )
      {
        typeName = QStringLiteral( "integer" );
      }
      else if ( couldBeLongLong[i] )
      {
        typeName = QStringLiteral( "longlong" );
      }
      else if ( couldBeDouble[i] )
      {
        typeName = QStringLiteral( "double" );
      }
    }
    if ( typeName == QStringLiteral( "integer" ) )
    {
      fieldType = QVariant::Int;
    }
    else if ( typeName == QStringLiteral( "longlong" ) )
    {
      fieldType = QVariant::LongLong;
    }
    else if ( typeName == QStringLiteral( "real" ) || typeName == QStringLiteral( "double" ) )
    {
      typeName = QStringLiteral( "double" );
      fieldType = QVariant::Double;
    }
    else
    {
      typeName = QStringLiteral( "text" );
    }
    attributeFields.append( QgsField( fieldNames[i], fieldType, typeName ) );
  }


  QgsDebugMsg( "Field count for the delimited text file is " + QString::number( attributeFields.size() ) );
  QgsDebugMsg( "geometry type is: " + QString::number( mWkbType ) );
  QgsDebugMsg( "feature count is: " + QString::number( mNumberFeatures ) );

  QStringList warnings;
  if ( ! csvtMessage.isEmpty() )
    warnings.append( csvtMessage );
  if ( nBadFormatRecords > 0 )
    warnings.append( tr( "%1 records discarded due to invalid format" ).arg( nBadFormatRecords ) );
  if ( nEmptyGeometry > 0 )
    warnings.append( tr( "%1 records have missing geometry definitions" ).arg( nEmptyGeometry ) );
  if ( nInvalidGeometry > 0 )
    warnings.append( tr( "%1 records discarded due to invalid geometry definitions" ).arg( nInvalidGeometry ) );
  if ( nIncompatibleGeometry > 0 )
    warnings.append( tr( "%1 records discarded due to incompatible geometry types" ).arg( nIncompatibleGeometry ) );

  reportErrors( warnings );

  // Decide whether to use subset ids to index records rather than simple iteration through all
  // If more than 10% of records are being skipped, then use index.  (Not based on any experimentation,
  // could do with some analysis?)

  if ( buildSubsetIndex )
  {
    long recordCount = mFile->recordCount();
    recordCount -= recordCount / SUBSET_ID_THRESHOLD_FACTOR;
    mUseSubsetIndex = mSubsetIndex.size() < recordCount;
    if ( ! mUseSubsetIndex )
      mSubsetIndex = QList<quintptr>();
  }

  mUseSpatialIndex = buildSpatialIndex;

  mValid = mGeometryType != QgsWkbTypes::UnknownGeometry;
  mLayerValid = mValid;

  // If it is valid, then watch for changes to the file
  connect( mFile.get(), &QgsDelimitedTextFile::fileUpdated, this, &QgsDelimitedTextProvider::onFileUpdated );
}