Search_csv_data.rb

This content was produced by an LLM and could include errors.

This script reads a CSV file, indexing unique records by a specified key. It then checks a list of target keys to detect duplicates and retrieve associated data entries from the dataset.

# frozen_string_literal: true
#=> nil

# Look up records in CSV data and flag duplicates
#=> nil

require 'csv'
#=> true

# Store the location of each unique key in the data
#=> nil
#
#=> nil
# @param data [Array<Array<String>>] The data to search through
#=> nil
# @param key_index [Integer] The index of the key to use for lookup
#=> nil
# @return [Hash<String, Integer>] A hash mapping each unique key to its index in the data
#=> nil
def store_data_location(data:, key_index:)
  data_location = {}
  
  data.each.with_index do |element, index|
    key = element[key_index].to_s
    
    # index = 0: duplicated
    index = 0 if data_location.key?(key)
    data_location.store(key, index)
  end
  data_location
end
#=> :store_data_location

# Search for the location of each unique key in the data
#=> nil
#
#=> nil
# @param csv_file_path [String] The path to the CSV file to search
#=> nil
# @param target_key [String] The key to use for lookup
#=> nil
# @return [Hash<String, Integer>] A hash mapping each unique key to its index in the data
#=> nil
def search_data_location(csv_file_path: 'sample.csv',
    target_key: 'id')
  csv_data_arr = CSV.read(csv_file_path)
  
  # key column index number
  headers = csv_data_arr.first
  headers_hash = headers.each.with_index.to_h #=> {'id' => 0, 'name' => 1, ...}
  key_column_index_number = headers_hash[target_key]
  
  store_data_location(data: csv_data_arr, key_index: key_column_index_number)
end
#=> :search_data_location

def main
  # search preparation
  ## find the index beforehand
  data_index = search_data_location(csv_file_path: 'sample.csv', target_key: 'email')
  ## load the CSV data into memory and assign it to a variable.
  csv_data_arr = CSV.read('sample.csv')
  
  # example: search
  puts '=== SEARCH ==='
  keys_for_search = ['john@example.com', 'ken@example.com', 'maria@example.com']
  keys_for_search.each do |key_for_search|
    location_index = data_index[key_for_search]
    if location_index.eql?(0)
      # TODO: modify as needed
      p "#{key_for_search} is duplicated."
      else
      # TODO: modify as needed
      p csv_data_arr[location_index]
    end
  end
end
#=> :main

main if __FILE__ == $PROGRAM_NAME
#=> nil

Ruby 4.0.3