// 使用Python + Hadoop实现文档上传
from pyhdfs import HdfsClient
def upload_to_hdfs(file_path, hdfs_url='http://localhost:9870'):
client = HdfsClient(hosts=hdfs_url)
with open(file_path, 'rb') as f:
client.create('/shared_docs/' + file_path.split('/')[-1], f.read())
if __name__ == "__main__":
upload_to_hdfs('example.docx')
]]>
// 使用MongoDB存储文档元信息
from pymongo import MongoClient
def init_db():
client = MongoClient('mongodb://localhost:27017/')
db = client['doc_shared']
return db
def add_doc_metadata(db, doc_id, user, permission):
collection = db['metadata']
collection.insert_one({'doc_id': doc_id, 'uploader': user, 'permission': permission})
if __name__ == "__main__":
db = init_db()
add_doc_metadata(db, "DOC001", "Alice", "public")
]]>
# Elasticsearch索引示例
from elasticsearch import Elasticsearch
def index_document(es, doc_id, content):
es.index(index="documents", id=doc_id, body={"content": content})
if __name__ == "__main__":
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
index_document(es, "DOC001", "This is an example document.")
]]>
本站知识库部分内容及素材来源于互联网,如有侵权,联系必删!