Search_csv_data.rb
This content was produced by an LLM and could include errors.
This script reads a CSV file, indexing unique records by a specified key. It then checks a list of target keys to detect duplicates and retrieve associated data entries from the dataset.
# frozen_string_literal: true
#=> nil
# Look up records in CSV data and flag duplicates
#=> nil
require 'csv'
#=> true
# Store the location of each unique key in the data
#=> nil
#
#=> nil
# @param data [Array<Array<String>>] The data to search through
#=> nil
# @param key_index [Integer] The index of the key to use for lookup
#=> nil
# @return [Hash<String, Integer>] A hash mapping each unique key to its index in the data
#=> nil
def store_data_location(data:, key_index:)
data_location = {}
data.each.with_index do |element, index|
key = element[key_index].to_s
# index = 0: duplicated
index = 0 if data_location.key?(key)
data_location.store(key, index)
end
data_location
end
#=> :store_data_location
# Search for the location of each unique key in the data
#=> nil
#
#=> nil
# @param csv_file_path [String] The path to the CSV file to search
#=> nil
# @param target_key [String] The key to use for lookup
#=> nil
# @return [Hash<String, Integer>] A hash mapping each unique key to its index in the data
#=> nil
def search_data_location(csv_file_path: 'sample.csv',
target_key: 'id')
csv_data_arr = CSV.read(csv_file_path)
# key column index number
headers = csv_data_arr.first
headers_hash = headers.each.with_index.to_h #=> {'id' => 0, 'name' => 1, ...}
key_column_index_number = headers_hash[target_key]
store_data_location(data: csv_data_arr, key_index: key_column_index_number)
end
#=> :search_data_location
def main
# search preparation
## find the index beforehand
data_index = search_data_location(csv_file_path: 'sample.csv', target_key: 'email')
## load the CSV data into memory and assign it to a variable.
csv_data_arr = CSV.read('sample.csv')
# example: search
puts '=== SEARCH ==='
keys_for_search = ['john@example.com', 'ken@example.com', 'maria@example.com']
keys_for_search.each do |key_for_search|
location_index = data_index[key_for_search]
if location_index.eql?(0)
# TODO: modify as needed
p "#{key_for_search} is duplicated."
else
# TODO: modify as needed
p csv_data_arr[location_index]
end
end
end
#=> :main
main if __FILE__ == $PROGRAM_NAME
#=> nil
Ruby 4.0.3