Data export functionality represents one of the most critical features in modern web applications. Users expect seamless access to their data in various formats, while administrators need efficient ways to extract information for reporting and analysis. I’ve spent considerable time working with Rails applications that handle massive datasets, and I’ve learned that building efficient data export systems requires careful consideration of memory usage, processing time, and user experience.
Streaming CSV Exports with Minimal Memory Footprint
Traditional export approaches often load entire datasets into memory before processing, causing severe performance issues with large datasets. Streaming exports solve this problem by processing data in small chunks and writing directly to the response stream.
class StreamingCsvExporter
def initialize(model_class, options = {})
@model_class = model_class
@batch_size = options[:batch_size] || 1000
@filters = options[:filters] || {}
@columns = options[:columns] || @model_class.column_names
@headers = options[:headers] || @columns.map(&:humanize)
end
def stream_to_response(response)
response.headers['Content-Type'] = 'text/csv'
response.headers['Content-Disposition'] = "attachment; filename=\"#{filename}\""
response.headers['Cache-Control'] = 'no-cache'
write_headers(response)
stream_data(response)
end
private
def write_headers(response)
response.stream.write(CSV.generate_line(@headers))
end
def stream_data(response)
total_processed = 0
query.find_in_batches(batch_size: @batch_size) do |batch|
csv_batch = generate_csv_batch(batch)
response.stream.write(csv_batch)
total_processed += batch.size
Rails.logger.info "Exported #{total_processed} records"
end
ensure
response.stream.close
end
def generate_csv_batch(records)
csv_string = String.new
records.each do |record|
row_data = @columns.map { |column| extract_column_value(record, column) }
csv_string << CSV.generate_line(row_data)
end
csv_string
end
def extract_column_value(record, column)
value = record.send(column)
case value
when Date, DateTime, Time
value.strftime('%Y-%m-%d %H:%M:%S')
when BigDecimal
value.to_f
when Hash, Array
value.to_json
else
value.to_s
end
end
def query
@model_class.where(@filters).includes(association_includes)
end
def association_includes
associations = []
@columns.each do |column|
if column.include?('.')
association = column.split('.').first
associations << association.to_sym
end
end
associations.uniq
end
def filename
timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
"#{@model_class.name.underscore}_export_#{timestamp}.csv"
end
end
The controller implementation demonstrates how to integrate streaming exports with Rails responses:
class ExportsController < ApplicationController
def users_csv
exporter = StreamingCsvExporter.new(
User,
filters: filter_params,
columns: %w[id email first_name last_name created_at status],
batch_size: 2000
)
respond_to do |format|
format.csv do
exporter.stream_to_response(response)
end
end
end
private
def filter_params
filters = {}
filters[:status] = params[:status] if params[:status].present?
filters[:created_at] = date_range if params[:start_date].present?
filters
end
def date_range
start_date = Date.parse(params[:start_date])
end_date = params[:end_date].present? ? Date.parse(params[:end_date]) : Date.current
start_date..end_date
end
end
This streaming approach maintains constant memory usage regardless of dataset size. I’ve used this technique to export millions of records while keeping memory consumption under 50MB.
Background Job Processing for Large Exports
Complex exports with multiple formats or extensive data processing benefit from background job processing. This approach prevents request timeouts and allows users to continue working while exports generate.
class BackgroundExportJob < ApplicationJob
queue_as :exports
def perform(export_request_id)
@export_request = ExportRequest.find(export_request_id)
@export_request.update!(status: 'processing', started_at: Time.current)
begin
export_file = generate_export
upload_result = upload_to_storage(export_file)
@export_request.update!(
status: 'completed',
completed_at: Time.current,
file_url: upload_result[:url],
file_size: File.size(export_file.path),
record_count: @total_records
)
notify_user_completion
rescue StandardError => e
handle_export_failure(e)
ensure
cleanup_temporary_files
end
end
private
def generate_export
case @export_request.format
when 'csv'
generate_csv_export
when 'excel'
generate_excel_export
when 'pdf'
generate_pdf_export
when 'json'
generate_json_export
else
raise "Unsupported format: #{@export_request.format}"
end
end
def generate_csv_export
temp_file = Tempfile.new(['export', '.csv'])
csv = CSV.open(temp_file.path, 'w')
write_csv_headers(csv)
@total_records = 0
query.find_in_batches(batch_size: 1000) do |batch|
batch.each do |record|
csv << format_csv_row(record)
@total_records += 1
update_progress if @total_records % 5000 == 0
end
end
csv.close
temp_file
end
def generate_excel_export
temp_file = Tempfile.new(['export', '.xlsx'])
package = Axlsx::Package.new
workbook = package.workbook
workbook.add_worksheet(name: @export_request.model_class) do |sheet|
add_excel_headers(sheet)
@total_records = 0
query.find_in_batches(batch_size: 500) do |batch|
batch.each do |record|
sheet.add_row(format_excel_row(record))
@total_records += 1
update_progress if @total_records % 2000 == 0
end
end
end
package.serialize(temp_file.path)
temp_file
end
def update_progress
progress_percentage = calculate_progress_percentage
@export_request.update!(
progress_percentage: progress_percentage,
processed_records: @total_records
)
broadcast_progress_update
end
def calculate_progress_percentage
return 0 unless @export_request.estimated_record_count&.positive?
[(@total_records.to_f / @export_request.estimated_record_count * 100).round(2), 100.0].min
end
def broadcast_progress_update
ActionCable.server.broadcast(
"export_progress_#{@export_request.user_id}",
{
export_id: @export_request.id,
progress: @export_request.progress_percentage,
processed_records: @total_records,
status: 'processing'
}
)
end
def query
model_class = @export_request.model_class.constantize
base_query = model_class.all
if @export_request.filters.present?
@export_request.filters.each do |key, value|
base_query = base_query.where(key => value)
end
end
base_query.includes(required_associations)
end
def upload_to_storage(file)
storage_key = "exports/#{@export_request.id}/#{@export_request.filename}"
if Rails.application.config.active_storage.service == :amazon
upload_to_s3(file, storage_key)
else
upload_to_local_storage(file)
end
end
def upload_to_s3(file, key)
s3_client = Aws::S3::Client.new
s3_client.put_object(
bucket: Rails.application.credentials.aws[:s3_bucket],
key: key,
body: File.read(file.path),
content_type: content_type_for_format
)
{
url: "https://#{Rails.application.credentials.aws[:s3_bucket]}.s3.amazonaws.com/#{key}",
storage_type: 's3'
}
end
def notify_user_completion
ExportMailer.export_ready(@export_request).deliver_now
ActionCable.server.broadcast(
"export_progress_#{@export_request.user_id}",
{
export_id: @export_request.id,
status: 'completed',
download_url: @export_request.file_url
}
)
end
end
The ExportRequest model manages export metadata and status tracking:
class ExportRequest < ApplicationRecord
belongs_to :user
validates :model_class, :format, :status, presence: true
validates :format, inclusion: { in: %w[csv excel pdf json] }
validates :status, inclusion: { in: %w[pending processing completed failed] }
scope :recent, -> { where('created_at > ?', 30.days.ago) }
scope :completed, -> { where(status: 'completed') }
def filename
timestamp = created_at.strftime('%Y%m%d_%H%M%S')
"#{model_class.underscore}_export_#{timestamp}.#{format_extension}"
end
def format_extension
case format
when 'excel' then 'xlsx'
when 'pdf' then 'pdf'
when 'json' then 'json'
else 'csv'
end
end
def estimated_duration
return nil unless estimated_record_count
case format
when 'csv'
(estimated_record_count / 10000.0).ceil.minutes
when 'excel'
(estimated_record_count / 5000.0).ceil.minutes
when 'pdf'
(estimated_record_count / 1000.0).ceil.minutes
else
(estimated_record_count / 8000.0).ceil.minutes
end
end
end
Multi-Format Export System with Template Support
Supporting multiple export formats requires a flexible architecture that can handle different data structures and formatting requirements. I’ve developed a template-based system that allows easy customization of export formats.
class MultiFormatExporter
SUPPORTED_FORMATS = %w[csv excel json xml pdf].freeze
def initialize(data_source, options = {})
@data_source = data_source
@format = options[:format] || 'csv'
@template = options[:template]
@include_metadata = options[:include_metadata] || false
@custom_fields = options[:custom_fields] || []
@transformations = options[:transformations] || {}
end
def export
validate_format
prepare_data
case @format
when 'csv'
export_csv
when 'excel'
export_excel
when 'json'
export_json
when 'xml'
export_xml
when 'pdf'
export_pdf
end
end
private
def prepare_data
@prepared_data = @data_source.map do |record|
base_data = extract_base_fields(record)
custom_data = extract_custom_fields(record)
transformed_data = apply_transformations(base_data.merge(custom_data))
@include_metadata ? add_metadata(transformed_data, record) : transformed_data
end
end
def extract_base_fields(record)
if @template
template_config = load_template(@template)
extract_fields_from_template(record, template_config)
else
record.respond_to?(:attributes) ? record.attributes : record.to_h
end
end
def extract_custom_fields(record)
custom_data = {}
@custom_fields.each do |field_config|
field_name = field_config[:name]
field_value = calculate_custom_field_value(record, field_config)
custom_data[field_name] = field_value
end
custom_data
end
def calculate_custom_field_value(record, config)
case config[:type]
when 'computed'
evaluate_computed_field(record, config[:formula])
when 'association'
extract_association_value(record, config[:association_path])
when 'aggregation'
calculate_aggregation(record, config[:aggregation_config])
else
record.send(config[:source_field]) if config[:source_field]
end
end
def evaluate_computed_field(record, formula)
case formula
when 'full_name'
"#{record.first_name} #{record.last_name}".strip
when 'age_years'
return nil unless record.birth_date
((Date.current - record.birth_date) / 365.25).floor
when 'account_status'
determine_account_status(record)
else
nil
end
end
def export_excel
temp_file = Tempfile.new(['export', '.xlsx'])
package = Axlsx::Package.new
workbook = package.workbook
# Main data sheet
workbook.add_worksheet(name: 'Data') do |sheet|
if @prepared_data.any?
headers = @prepared_data.first.keys
sheet.add_row(headers.map(&:humanize))
@prepared_data.each do |row|
formatted_row = headers.map { |header| format_excel_cell(row[header]) }
sheet.add_row(formatted_row)
end
apply_excel_formatting(sheet, headers.length, @prepared_data.length)
end
end
# Metadata sheet if requested
if @include_metadata
add_metadata_sheet(workbook)
end
package.serialize(temp_file.path)
temp_file
end
def apply_excel_formatting(sheet, column_count, row_count)
# Header formatting
header_style = sheet.styles.add_style(
bg_color: '366092',
fg_color: 'FFFFFF',
b: true,
alignment: { horizontal: :center }
)
sheet.rows.first.style = header_style
# Alternate row coloring
even_row_style = sheet.styles.add_style(bg_color: 'F8F9FA')
(2..row_count + 1).each do |row_index|
next unless row_index.even?
sheet.rows[row_index - 1].style = even_row_style
end
# Auto-fit columns
sheet.column_widths(*Array.new(column_count, :auto))
end
def export_json
temp_file = Tempfile.new(['export', '.json'])
export_data = {
data: @prepared_data,
export_info: {
generated_at: Time.current.iso8601,
record_count: @prepared_data.length,
format: 'json'
}
}
File.write(temp_file.path, JSON.pretty_generate(export_data))
temp_file
end
def export_pdf
temp_file = Tempfile.new(['export', '.pdf'])
Prawn::Document.generate(temp_file.path) do |pdf|
pdf.text "Data Export Report", size: 20, style: :bold
pdf.text "Generated: #{Time.current.strftime('%B %d, %Y at %I:%M %p')}", size: 12
pdf.move_down 20
if @prepared_data.any?
headers = @prepared_data.first.keys.map(&:humanize)
table_data = [@prepared_data.first.keys.map(&:humanize)]
table_data += @prepared_data.map { |row| row.values.map(&:to_s) }
pdf.table(table_data, header: true, width: pdf.bounds.width) do
row(0).font_style = :bold
row(0).background_color = 'DDDDDD'
cells.padding = 5
cells.borders = [:top, :bottom]
cells.border_width = 0.5
end
end
pdf.number_pages "Page <page> of <total>", at: [pdf.bounds.right - 150, 0]
end
temp_file
end
def load_template(template_name)
template_path = Rails.root.join('config', 'export_templates', "#{template_name}.yml")
YAML.load_file(template_path) if File.exist?(template_path)
end
end
Template configuration files allow flexible field definitions:
# config/export_templates/user_summary.yml
name: "User Summary Export"
description: "Basic user information with computed fields"
fields:
- name: "id"
source: "id"
type: "integer"
- name: "full_name"
type: "computed"
formula: "full_name"
- name: "email"
source: "email"
type: "string"
- name: "registration_date"
source: "created_at"
type: "date"
format: "%Y-%m-%d"
- name: "account_age_days"
type: "computed"
formula: "account_age_days"
- name: "total_orders"
type: "aggregation"
aggregation_config:
association: "orders"
method: "count"
- name: "total_spent"
type: "aggregation"
aggregation_config:
association: "orders"
method: "sum"
field: "total_amount"
formatting:
currency_fields: ["total_spent"]
date_fields: ["registration_date"]
boolean_fields: ["active"]
Progress Tracking and Real-time Updates
Large exports benefit from progress tracking that keeps users informed about processing status. I implement this using Action Cable for real-time updates and database progress storage.
class ExportProgressTracker
def initialize(export_request)
@export_request = export_request
@last_update_time = Time.current
@update_interval = 2.seconds
end
def update_progress(current_count, total_count = nil, additional_info = {})
return unless should_update?
progress_data = calculate_progress_metrics(current_count, total_count)
progress_data.merge!(additional_info)
update_database(progress_data)
broadcast_update(progress_data)
@last_update_time = Time.current
end
def mark_completed(final_stats = {})
completion_data = {
status: 'completed',
progress_percentage: 100.0,
completed_at: Time.current,
processing_duration: calculate_processing_duration
}.merge(final_stats)
update_database(completion_data)
broadcast_update(completion_data)
cleanup_progress_tracking
end
def mark_failed(error_message, error_details = {})
failure_data = {
status: 'failed',
error_message: error_message,
failed_at: Time.current,
processing_duration: calculate_processing_duration
}.merge(error_details)
update_database(failure_data)
broadcast_update(failure_data)
log_failure(error_message, error_details)
end
private
def should_update?
Time.current - @last_update_time >= @update_interval
end
def calculate_progress_metrics(current_count, total_count)
metrics = {
processed_records: current_count,
updated_at: Time.current
}
if total_count
metrics[:total_records] = total_count
metrics[:progress_percentage] = calculate_percentage(current_count, total_count)
metrics[:estimated_completion] = estimate_completion_time(current_count, total_count)
end
metrics[:processing_rate] = calculate_processing_rate(current_count)
metrics
end
def calculate_percentage(current, total)
return 0.0 if total.zero?
[(current.to_f / total * 100).round(2), 100.0].min
end
def estimate_completion_time(current, total)
return nil if current.zero? || total.zero? || current >= total
elapsed_time = Time.current - @export_request.started_at
rate = current.to_f / elapsed_time.to_f
remaining_records = total - current
Time.current + (remaining_records / rate).seconds
end
def calculate_processing_rate(current_count)
return 0.0 unless @export_request.started_at
elapsed_seconds = Time.current - @export_request.started_at
return 0.0 if elapsed_seconds.zero?
(current_count.to_f / elapsed_seconds).round(2)
end
def update_database(data)
@export_request.update!(data)
rescue ActiveRecord::RecordInvalid => e
Rails.logger.error "Failed to update export progress: #{e.message}"
end
def broadcast_update(data)
ActionCable.server.broadcast(
"export_progress_#{@export_request.user_id}",
format_broadcast_data(data)
)
rescue StandardError => e
Rails.logger.error "Failed to broadcast export progress: #{e.message}"
end
def format_broadcast_data(data)
{
export_id: @export_request.id,
type: 'progress_update',
timestamp: Time.current.iso8601
}.merge(data.stringify_keys)
end
def calculate_processing_duration
return nil unless @export_request.started_at
Time.current - @export_request.started_at
end
end
The frontend JavaScript handles real-time progress updates:
class ExportProgressMonitor {
constructor(exportId, userId) {
this.exportId = exportId;
this.userId = userId;
this.progressElement = document.getElementById(`export-progress-${exportId}`);
this.setupWebSocket();
this.startProgressPolling();
}
setupWebSocket() {
this.subscription = App.cable.subscriptions.create(
{ channel: 'ExportProgressChannel', user_id: this.userId },
{
received: (data) => {
if (data.export_id === this.exportId) {
this.updateProgressDisplay(data);
}
},
connected: () => {
console.log('Connected to export progress channel');
},
disconnected: () => {
console.log('Disconnected from export progress channel');
this.startProgressPolling();
}
}
);
}
updateProgressDisplay(data) {
if (!this.progressElement) return;
const progressBar = this.progressElement.querySelector('.progress-bar');
const statusText = this.progressElement.querySelector('.status-text');
const detailsText = this.progressElement.querySelector('.details-text');
if (progressBar && data.progress_percentage !== undefined) {
progressBar.style.width = `${data.progress_percentage}%`;
progressBar.setAttribute('aria-valuenow', data.progress_percentage);
}
if (statusText) {
statusText.textContent = this.formatStatusText(data);
}
if (detailsText) {
detailsText.textContent = this.formatDetailsText(data);
}
if (data.status === 'completed') {
this.handleExportCompleted(data);
} else if (data.status === 'failed') {
this.handleExportFailed(data);
}
}
formatStatusText(data) {
switch (data.status) {
case 'processing':
return `Processing... ${data.progress_percentage || 0}%`;
case 'completed':
return 'Export completed successfully';
case 'failed':
return 'Export failed';
default:
return 'Preparing export...';
}
}
formatDetailsText(data) {
const parts = [];
if (data.processed_records) {
parts.push(`${data.processed_records.toLocaleString()} records processed`);
}
if (data.processing_rate) {
parts.push(`${data.processing_rate} records/sec`);
}
if (data.estimated_completion) {
const eta = new Date(data.estimated_completion);
parts.push(`ETA: ${eta.toLocaleTimeString()}`);
}
return parts.join(' • ');
}
handleExportCompleted(data) {
this.stopProgressPolling();
if (data.download_url) {
this.showDownloadButton(data.download_url);
}
this.showNotification('Export completed successfully!', 'success');
}
showDownloadButton(downloadUrl) {
const downloadButton = document.createElement('a');
downloadButton.href = downloadUrl;
downloadButton.className = 'btn btn-success btn-sm';
downloadButton.textContent = 'Download Export';
downloadButton.target = '_blank';
const buttonContainer = this.progressElement.querySelector('.button-container');
if (buttonContainer) {
buttonContainer.appendChild(downloadButton);
}
}
startProgressPolling() {
if (this.pollingTimer) return;
this.pollingTimer = setInterval(() => {
this.fetchProgressUpdate();
}, 3000);
}
stopProgressPolling() {
if (this.pollingTimer) {
clearInterval(this.pollingTimer);
this.pollingTimer = null;
}
}
async fetchProgressUpdate() {
try {
const response = await fetch(`/exports/${this.exportId}/progress`);
const data = await response.json();
this.updateProgressDisplay(data);
} catch (error) {
console.error('Failed to fetch progress update:', error);
}
}
}
Error Recovery and Resumable Exports
Large exports can fail due to various reasons including network issues, database timeouts, or server restarts. Implementing recovery mechanisms ensures exports can resume from their last successful checkpoint.
class ResumableExportJob < ApplicationJob
queue_as :exports
def perform(export_request_id, resume_from = 0)
@export_request = ExportRequest.find(export_request_id)
@resume_from = resume_from
@checkpoint_interval = 5000
@progress_tracker = ExportProgressTracker.new(@export_request)
begin
setup_export_context
perform_resumable_export
finalize_successful_export
rescue StandardError => e
handle_export_failure(e)
ensure
cleanup_resources
end
end
private
def setup_export_context
@export_request.update!(
status: 'processing',
started_at: @resume_from.zero? ? Time.current : @export_request.started_at,
resume_count: @export_request.resume_count + (@resume_from.positive? ? 1 : 0)
)
@temp_file = initialize_export_file
@total_records = estimate_total_records
@processed_count = @resume_from
end
def perform_resumable_export
query = build_resumable_query
query.find_in_batches(batch_size: batch_size, start: @resume_from) do |batch|
process_batch_with_checkpoints(batch)
break if export_cancelled?
end
end
def process_batch_with_checkpoints(batch)
batch.each_with_index do |record, index|
process_single_record(record)
@processed_count += 1
if should_create_checkpoint?
create_checkpoint
@progress_tracker.update_progress(@processed_count, @total_records)
end
end
end
def process_single_record(record)
formatted_data = format_record_for_export(record)
write_record_to_file(formatted_data)
rescue StandardError => e
handle_record_processing_error(record, e)
end
def should_create_checkpoint?
@processed_count % @checkpoint_interval == 0
end
def create_checkpoint
checkpoint_data = {
export_request_id: @export_request.id,
processed_count: @processed_count,
checkpoint_created_at: Time.current,
temp_file_path: @temp_file.path,
current_batch_id: @current_batch_id
}
ExportCheckpoint.create!(checkpoint_data)
# Keep only the 3 most recent checkpoints
cleanup_old_checkpoints
end
def cleanup_old_checkpoints
old_checkpoints = ExportCheckpoint
.where(export_request_id: @export_request.id)
.order(created_at: :desc)
.offset(3)
old_checkpoints.destroy_all
end
def handle_export_failure(error)
create_failure_checkpoint(error)
if should_retry_export?(error)
schedule_retry
else
mark_export_as_faile