10
10
import sys
11
11
import tempfile
12
12
import textwrap
13
- from turtle import TurtleScreenBase
13
+ import glob
14
14
15
15
from .handler import _check_log_handler
16
16
from .pandoc_download import DEFAULT_TARGET_FOLDER , download_pandoc
@@ -68,12 +68,12 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin
68
68
cworkdir = cworkdir )
69
69
70
70
71
- def convert_file (source_file :str , to :str , format :Union [str , None ]= None , extra_args :Iterable = (), encoding :str = 'utf-8' ,
71
+ def convert_file (source_file :Union [ list , str ] , to :str , format :Union [str , None ]= None , extra_args :Iterable = (), encoding :str = 'utf-8' ,
72
72
outputfile :Union [None , str ]= None , filters :Union [Iterable , None ]= None , verify_format :bool = True ,
73
73
sandbox :bool = True , cworkdir :Union [str , None ]= None ) -> str :
74
74
"""Converts given `source` from `format` to `to`.
75
75
76
- :param str source_file: file path (see encoding)
76
+ :param ( str, list) source_file: Either a full file path, relative file path, a file patterh (like dir/*.md), or a list if file or file patterns.
77
77
78
78
:param str to: format into which the input should be converted; can be one of
79
79
`pypandoc.get_pandoc_formats()[1]`
@@ -107,14 +107,39 @@ def convert_file(source_file:str, to:str, format:Union[str, None]=None, extra_ar
107
107
"""
108
108
if not _identify_path (source_file ):
109
109
raise RuntimeError ("source_file is not a valid path" )
110
- format = _identify_format_from_path (source_file , format )
111
- return _convert_input (source_file , format , 'path' , to , extra_args = extra_args ,
110
+ if _is_network_path (source_file ): # if the source_file is an url
111
+ format = _identify_format_from_path (source_file , format )
112
+ return _convert_input (source_file , format , 'path' , to , extra_args = extra_args ,
112
113
outputfile = outputfile , filters = filters ,
113
114
verify_format = verify_format , sandbox = sandbox ,
114
115
cworkdir = cworkdir )
115
116
117
+ discovered_source_files = []
118
+ if isinstance (source_file , str ):
119
+ discovered_source_files += glob .glob (source_file )
120
+ if isinstance (source_file , list ): # a list of possibly file or file patterns. Expand all with glob
121
+ for filepath in source_file :
122
+ discovered_source_files .extend (glob .glob (filepath ))
123
+ if len (discovered_source_files ) == 1 : # behavior for a single file or a pattern
124
+ format = _identify_format_from_path (discovered_source_files [0 ], format )
125
+ return _convert_input (discovered_source_files [0 ], format , 'path' , to , extra_args = extra_args ,
126
+ outputfile = outputfile , filters = filters ,
127
+ verify_format = verify_format , sandbox = sandbox ,
128
+ cworkdir = cworkdir )
129
+ else : # behavior for multiple files or file patterns
130
+ format = _identify_format_from_path (discovered_source_files [0 ], format )
131
+ return _convert_input (discovered_source_files , format , 'path' , to , extra_args = extra_args ,
132
+ outputfile = outputfile , filters = filters ,
133
+ verify_format = verify_format , sandbox = sandbox ,
134
+ cworkdir = cworkdir )
116
135
117
- def _identify_path (source :str ) -> bool :
136
+
137
+ def _identify_path (source ) -> bool :
138
+ if isinstance (source , list ):
139
+ for single_source in source :
140
+ if not _identify_path (single_source ):
141
+ return False
142
+ return True
118
143
is_path = False
119
144
try :
120
145
is_path = os .path .exists (source )
@@ -124,6 +149,15 @@ def _identify_path(source:str) -> bool:
124
149
# still false
125
150
pass
126
151
152
+ if not is_path :
153
+ try :
154
+ is_path = len (glob .glob (source )) >= 1
155
+ except UnicodeEncodeError :
156
+ is_path = len (glob .glob (source .encode ('utf-8' ))) >= 1
157
+ except : # noqa
158
+ # still false
159
+ pass
160
+
127
161
if not is_path :
128
162
try :
129
163
# check if it's an URL
@@ -140,6 +174,21 @@ def _identify_path(source:str) -> bool:
140
174
141
175
return is_path
142
176
177
+ def _is_network_path (source ):
178
+ try :
179
+ # check if it's an URL
180
+ result = urlparse (source )
181
+ if result .scheme in ["http" , "https" ]:
182
+ return True
183
+ elif result .scheme and result .netloc and result .path :
184
+ # complete uri including one with a network path
185
+ return True
186
+ elif result .scheme == "file" and result .path :
187
+ return os .path .exists (url2path (source ))
188
+ except AttributeError :
189
+ pass
190
+ return False
191
+
143
192
144
193
def _identify_format_from_path (sourcefile :str , format :str ) -> str :
145
194
return format or os .path .splitext (sourcefile )[1 ].strip ('.' )
@@ -242,7 +291,13 @@ def _convert_input(source, format, input_type, to, extra_args=(),
242
291
to = normalize_format (to )
243
292
244
293
string_input = input_type == 'string'
245
- input_file = [source ] if not string_input else []
294
+ if not string_input :
295
+ if isinstance (source , str ):
296
+ input_file = [source ]
297
+ else :
298
+ input_file = source
299
+ else :
300
+ input_file = []
246
301
args = [__pandoc_path , '--from=' + format ]
247
302
248
303
args .append ('--to=' + to )
@@ -294,11 +349,12 @@ def _convert_input(source, format, input_type, to, extra_args=(),
294
349
p .stderr .read ())
295
350
)
296
351
297
- try :
298
- source = cast_bytes (source , encoding = 'utf-8' )
299
- except (UnicodeDecodeError , UnicodeEncodeError ):
300
- # assume that it is already a utf-8 encoded string
301
- pass
352
+ if string_input :
353
+ try :
354
+ source = cast_bytes (source , encoding = 'utf-8' )
355
+ except (UnicodeDecodeError , UnicodeEncodeError ):
356
+ # assume that it is already a utf-8 encoded string
357
+ pass
302
358
try :
303
359
stdout , stderr = p .communicate (source if string_input else None )
304
360
except OSError :
0 commit comments