34 lines
1.1 KiB
Python
34 lines
1.1 KiB
Python
import pandas as pd
|
||
|
||
def read_and_print_parquet_row(file_path, row_index=0):
|
||
"""
|
||
读取Parquet文件并打印指定行的数据
|
||
|
||
参数:
|
||
file_path (str): Parquet文件路径
|
||
row_index (int): 要打印的行索引(默认为第0行)
|
||
"""
|
||
try:
|
||
# 读取Parquet文件
|
||
df = pd.read_parquet(file_path)
|
||
|
||
# 检查行索引是否有效
|
||
if row_index >= len(df):
|
||
print(f"错误: 行索引 {row_index} 超出范围(文件共有 {len(df)} 行)")
|
||
return
|
||
|
||
# 打印指定行数据
|
||
print(f"文件: {file_path}")
|
||
print(f"第 {row_index} 行数据:\n{'-'*30}")
|
||
print(df.iloc[row_index])
|
||
|
||
except FileNotFoundError:
|
||
print(f"错误: 文件 {file_path} 不存在")
|
||
except Exception as e:
|
||
print(f"读取失败: {str(e)}")
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
file_path = "example.parquet" # 替换为你的Parquet文件路径
|
||
read_and_print_parquet_row("/home/jgl20/LYT/work/data/data/chunk-000/episode_000000.parquet", row_index=0) # 打印第0行
|