init repo

This commit is contained in:
2025-04-05 21:46:49 +08:00
parent 4b58b22868
commit 91c2b7b0cb
17 changed files with 2473 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
import pandas as pd
def read_and_print_parquet_row(file_path, row_index=0):
"""
读取Parquet文件并打印指定行的数据
参数:
file_path (str): Parquet文件路径
row_index (int): 要打印的行索引默认为第0行
"""
try:
# 读取Parquet文件
df = pd.read_parquet(file_path)
# 检查行索引是否有效
if row_index >= len(df):
print(f"错误: 行索引 {row_index} 超出范围(文件共有 {len(df)} 行)")
return
# 打印指定行数据
print(f"文件: {file_path}")
print(f"{row_index} 行数据:\n{'-'*30}")
print(df.iloc[row_index])
except FileNotFoundError:
print(f"错误: 文件 {file_path} 不存在")
except Exception as e:
print(f"读取失败: {str(e)}")
# 示例用法
if __name__ == "__main__":
file_path = "example.parquet" # 替换为你的Parquet文件路径
read_and_print_parquet_row("/home/jgl20/LYT/work/data/data/chunk-000/episode_000000.parquet", row_index=0) # 打印第0行