我有一个包含许多字段的 Csv,所以我的问题是我想检查天气是否已经存在字段数据的组合,例如。 开始日期、结束日期、区域、包、联合等字段不应在 csv 中具有相同的数据 我在 cake php 4 中实现它
我使用 in_array 检查数据 有时,如果数据是随机分布的,它会起作用,但如果数据是有序的,它会抛出它存在于文件中的消息,而实际数据并不重复
public function validateWagerows($tmp_filename) {
$invalid_rows = array();
$invalid_row_count =array();
$invalid_row_count = 0;
$not_found_count = 0;
$unionnumbers=array();
$sd=array();
$ed=array();
$pck=array();
$zd=array();
$wr=array();
$not_found=array();
// open the file
$file = fopen($tmp_filename, "r");
if($file === false) {
$this->Flash->error('Failed to open .csv file');
return $this->redirect(['action' => 'upload']);
} else {
$header = fgetcsv($file);
while(($row = fgetcsv($file)) !== FALSE) {
foreach ($header as $k => $head) {
if($head == 'Start Date') {
$start_dates = (isset($row[$k])) ? trim($row[$k]) : '';
}
else if($head == 'End Date') {
$end_dates = (isset($row[$k])) ? trim($row[$k]) : '';
}
else if($head == 'Local Union') {
$local_union_ids= (isset($row[$k])) ? trim($row[$k]) : '';
}
else if($head == 'Package') {
$package_id = (isset($row[$k])) ? trim($row[$k]) : '';
}
else if($head === 'Zone') {
$zones_id = (isset($row[$k])) ? trim($row[$k]) : '';
}
}
if (in_array($start_dates,$sd) && in_array($end_dates,$ed) && in_array($local_union_ids, $unionnumbers) && in_array($zones_id,$zd) && in_array($package_id,$pck) )
{
$invalid_rows[$invalid_row_count]['same-data'] = "No Two or more rows with same Zone + Package +Unions within a Start and End Date can Exist in Csv ";
$result = ['invalid_row_count' => $invalid_row_count, 'invalid_rows' => $invalid_rows];
$true=$true+1;
}
$unionnumbers[]=$local_union_ids;
$sd[]=$start_dates;
$ed[]=$end_dates;
$pck[]=$package_id;
$zd[]=$zones_id;
$invalid_row_count++;
}
fclose($file);
$result = ['invalid_row_count' => $invalid_row_count, 'invalid_rows' => $invalid_rows];
print_r($result);
die;
return $result;
}
}
这对弄清楚它会有很大帮助,或者任何有现有脚本的人都可以分享
识别重复记录的常用策略是散列。您可以创建要唯一标识的值集的编码表示,并将这些散列存储在缓冲区中,然后检查当前记录的散列是否已在缓冲区中。这简化了逻辑,并且比其他方法更有效。
<?php
class Test
{
public function validateWagerows($tmp_filename)
{
$invalid_rows = array();
$invalid_row_count = 0;
$hashes = array();
// open the file
$file = fopen($tmp_filename, "r");
if ($file === false)
{
$this->Flash->error('Failed to open .csv file');
return $this->redirect(['action' => 'upload']);
}
else
{
$header = fgetcsv($file);
while (($row = fgetcsv($file)) !== FALSE) {
foreach ($header as $k => $head) {
if ($head == 'Start Date') {
$start_dates = (isset($row[$k])) ? trim($row[$k]) : '';
} else if ($head == 'End Date') {
$end_dates = (isset($row[$k])) ? trim($row[$k]) : '';
} else if ($head == 'Local Union') {
$local_union_ids = (isset($row[$k])) ? trim($row[$k]) : '';
} else if ($head == 'Package') {
$package_id = (isset($row[$k])) ? trim($row[$k]) : '';
} else if ($head === 'Zone') {
$zones_id = (isset($row[$k])) ? trim($row[$k]) : '';
}
}
// Create a hash from the relavant values
$currKey = sha1($start_dates . $end_dates . $local_union_ids . $zones_id . $package_id);
// Check if the hash has already been recorded
if (in_array($currKey, $hashes))
{
$invalid_rows[$invalid_row_count]['same-data'] = "No Two or more rows with same Zone + Package +Unions within a Start and End Date can Exist in Csv ";
}
// Set the hash into the hash buffer
$hashes[] = $currKey;
// Are we doing this so we know which rows are bad? Maybe not needed...
$invalid_row_count++;
}
fclose($file);
$result = ['invalid_row_count' => $invalid_row_count, 'invalid_rows' => $invalid_rows];
return $result;
}
}
}
// Create a file with some test data
$fileName = 'test.csv';
$fh = fopen($fileName, 'w+');
$testData = [
['Start Date', 'End Date', 'Local Union', 'Package', 'Zone'],
['2023-01-01', '2023-12-31', '556', '45', '15'],
['2023-02-01', '2024-02-01', '762', '51', '47'],
['2023-03-01', '2024-03-01', '545', '39', '74'],
['2023-02-01', '2024-02-01', '762', '51', '47'] // Duplicate in fourth row
];
foreach ($testData as $currRow) {
fputcsv($fh, $currRow);
}
// Run the test
$test = new Test();
$result = $test->validateWagerows($fileName);
// Make sure we caught the last row
if(!array_key_exists(3, $result['invalid_rows']))
{
echo 'The fourth row should have been flagged'.PHP_EOL;
}
print_r($result);