feisty meow concerns codebase  2.140
filename_helper.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 """
4 
5 Name : filename helper
6 Author : Chris Koeritz
7 Rights : Copyright (C) 1996-$now by Author
8 
9 Purpose:
10 
11  Functions that manipulate filenames in various helpful ways.
12 
13 License:
14 This program is free software; you can redistribute it and/or modify it
15 the terms of the GNU General Public License as published by the Free
16 Software Foundation; either version 2 of the License or (at your option)
17 any later version. See: "http://www.gruntose.com/Info/GNU/GPL.html" for a
18 version of the License. Please send any updates to "fred@gruntose.com".
19 
20 """
21 
22 import os
23 import path
24 
25 
26 
27 #hmmm: lots of interesting perl interrupt handling stuff. do we need any of that? betting not.
28 
41 
42 
43 
44 # takes an array of filenames (each possibly containing spaces and/or
45 # wildcards) and resolves it to a useful list of actual files.
46 def glob_list(original_names: list) -> list:
47  """
48  takes a set of filenames that may be relative (or really arcane) and globs them into a normal list of filenames.
49  """
50 
51  to_return = [] # the final form of the name list.
52  print("temp list is: " + original_names)
53 
54  # scan through the list we're given.
55  for entry in original_names:
56  print("entry is: " + entry)
57 
58  chopped_filename = split_filename(entry)
59  print("chopped 0=" + chopped_filename[0])
60  print("chopped 1=" + chopped_filename[1])
61 
62  if chopped_filename[0] == "." or chopped_filename[0] == "..":
63  # add the simple directory name into the list.
64  to_return.append(chopped_filename[0])
65  continue
66 
67  if chopped_filename[1] == ".":
68  # add a directory that didn't have more pattern attached.
69  to_return.append(chopped_filename[0])
70  continue
71 
72  # get all the contents from the directory (both subdirectories and files).
73  files_found = os.listdir(chopped_filename[0])
74 
75  # a dictionary of patterns to find in filenames and their safer replacements.
76  replacement_patterns = {
77  r's/\.': r'\\.', # replace periods with escaped ones.
78  r's/\*': r'.*', # replace asterisks with dot star.
79  r's/\+': r'\\+', # escape plusses.
80  r's/\?': r'\\?', # escape question marks.
81  r's/\|': r'\\|', # escape pipe char.
82  r's/\$': r'\\\$', # escape dollar sign.
83  r's/\[': r'\\[', # escape open bracket.
84  r's/\]': r'\\]', # escape close bracket.
85  r's/\‍(': r'\\(', # escape open quote.
86  r's/\‍)': r'\\)', # escape close quote.
87  r's/\{': r'\\{', # escape open curly bracket.
88  r's/\}': r'\\}' # escape close curly bracket.
89  }
90 
91  for possible_name in files_found:
92  match = chopped_filename[1]
93 
94  for seek, replacer in replacement_patterns:
95  match = re.sub(seek, replacer, match)
96 
97  # make sure that we match against the whole string.
98  match = "^" + match + "\$"
99  print("possibname is '" + possible_name + "':")
100  if re.search(match, possible_name):
101  # this one matches, so add it to our list.
102  to_return.append(chopped_filename[0] + possible_name)
103  print("got a match on:" + chopped_filename)
104 
105  return to_return
106 
107 
108 
109 
110 # reports if two file names are the same file.
111 def same_file(file1: str, file2: str) -> bool:
112  try:
113  f1_stat = stat(file1)
114  f2_stat = stat(file2)
115  return (f1_stat.ST_INO == f2_stat.ST_INO) and (f1_stat.ST_DEV == f2_stat.ST_DEV)
116  except:
117  return None
118 
119 
120 
121 # splits a filename into a directory and file specification.
122 def split_filename(pathname: str) -> list:
123  chewed_name = remove_trailing_slashes(pathname)
124  chewed_name = canonicalize(chewed_name)
125  chewed_name = patch_name_for_pc(chewed_name)
126  if re.search(r'/', chewed_name):
127  # there's a slash in there.
128  directory_part = os.path.dirname(chewed_name)
129  file_part = os.path.basename(chewed_name)
130  if len(file_part) == 0:
131  # if there was no file specification, just add a non-matching spec.
132  file_part = '.'
133  return directory_part, file_part
134  elif chewed_name == '.':
135  # simple comparison to the current directory.
136  return ".", ""
137  elif chewed_name == "..":
138  # simple comparison to the parent directory.
139  return "..", ""
140  else:
141  # no slash in this name, so we fix that and also make sure we match
142  # the whole name.
143  return "./", chewed_name
144 
145 
146 
147 #hmmm: kind of legacy to provide our own dirname and basename, but we're
148 # just migrating this code right now, not perfecting it.
149 
150 # returns the directory part of the filename.
151 def dirname(pathname: str) -> str:
152  return split_filename(pathname)[0];
153 
154 # returns the base part of the filename; this omits any directories.
155 def basename(pathname: str) -> str:
156  return split_filename(pathname)[1];
157 
158 # returns the extension found on the filename, if any.
159 def extension(pathname: str) -> str:
160  base = basename(str)
161 #printf "base is $base";
162  found = -1
163  # work backwards from the end of the base name.
164  for i in range(len(base) - 1, -1, -1):
165 #printf "char is " . substr($base, $i, 1) . ""
166  if base[i] == '.':
167  found = i;
168 #printf "got period found is $found";
169  break
170  if found >= 0:
171  return base[found : len(base) - found]
172  return "" # no extension seen.
173 
174 # returns the portion of the filename without the extension.
175 def non_extension(pathname: str) -> str:
176  full = remove_trailing_slashes(pathname)
177  full = canonicalize(full)
178  full = patch_name_for_pc(full)
179  ext = extension(full)
180  to_remove = len(ext)
181  return full[0 : len(full) - to_remove]
182 
183 
184 
185 # removes all directory slashes (either '/' or '\') from the end of a string.
186 def remove_trailing_slashes(pathname: str) -> str:
187  directory_name = pathname
188  # start looking at the end of the string.
189  inspection_point = len(directory_name) - 1;
190  while inspection_point > 0:
191  # examine the last character in the string to see if it's a slash.
192  final_char = directoryname[inspection_point:inspection_point]
193  # leave the loop if it's not a slash.
194  if not final_char == '/' && not final_char == "\\":
195  break
196  directory_name = directory_name[0 : len(directory_name) - 1] # remove the slash.
197  inspection_point-- # check the new last character.
198  return directory_name
199 
200 
201 
202 # returns the proper directory separator for this platform. this requires
203 # an environment variable called "OS" for non-Unix operating systems. the
204 # valid values for that are shown below.
205 def directory_separator() -> str:
206  if OS == "Windows_NT" or OS == "Windows_95" or OS == "DOS" or OS == "OS2":
207  return "\\"
208  return "/"
209 
210 
211 
212 # these mutate the directory slashes in a directory name.
213 
214 # the one we use most frequently; it uses the unix slash.
215 def canonicalize(pathname: str) -> str:
216  return canonicalizer(pathname, '/')
217 
218 # one that turns names into the style native on the current platform.
219 def native_canonicalize(pathname: str) -> str:
220  return canonicalizer(pathname, &directory_separator())
221 
222 # one that explicitly uses pc style back-slashes.
223 def pc_canonicalize(pathname: str) -> str:
224  return canonicalizer(pathname, '\\')
225 
226 # one that explicitly does unix style forward slashes.
227 def unix_canonicalize(pathname: str) -> str:
228  return canonicalizer(pathname, '/')
229 
230 
231 # this more general routine gets a directory separator passed in. it then
232 # replaces all the separators with that one.
233 def canonicalizer(directory_name: str, dirsep: str) -> str:
234  print("old dir name is " + directory_name)
235 
236  # somewhat abbreviated check; only catches windoze systems, not dos or os2.
237  if re.search("win", OS, re.IGNORE_CASE):
238  # IS_MSYS is calculated by feisty meow scripts startup; it will be
239  # non-empty if this is the msys tool kit.
240  if len(IS_MSYS) > 0:
241  # msys utilities version (http://www.mingw.org)
242 # $directory_name =~ s/^(.):[\\\/](.*)$/\/\1\/\2/;
243  directory_name = re.sub('^(.):[\\\/](.*)$', '\/\1\/\2')
244  else:
245  # cygwin utilities version (http://www.cygwin.com)
246 # $directory_name =~ s/^(.):[\\\/](.*)$/\/cygdrive\/\1\/\2/;
247  directory_name = re.sub('^(.):[\\\/](.*)$', '\/cygdrive\/\1\/\2/')
248 #print "new dir name is \"$directory_name\"";
249 
250  # turn all the non-default separators into the default.
251  for j in range(0, len(directory_name)):
252 # for (local($j) = 0; $j < length($directory_name); $j++) {
253  if directory_name[j, j+1] == "\\" or directory_name[j, j+1] == "/":
254  directory_name[j] = dirsep
255 
256  # remove all occurrences of double separators except for the first
257  # double set, which could be a UNC filename.
258  saw_sep = False
259  for i in range(1, len(directory_name)):
260  # iterate through the string looking for redundant separators.
261 #hmmm: unconverted below here--monsters !!!
262  if (substr($directory_name, $i, 1) eq $dirsep) {
263  # we found a separator character.
264  if ($saw_sep) {
265  # we had just seen a separator, so this is two in a row.
266  local($head, $tail) = (substr($directory_name, 0, $i - 1),
267  substr($directory_name, $i, length($directory_name) - 1));
268  $directory_name = $head . $tail;
269  # put the name back together without this redundant character.
270  $i--; # skip back one and try again.
271  } else {
272  # we have now seen a separator.
273  $saw_sep = 1;
274  }
275  } else {
276  # this character was not a separator.
277  $saw_sep = 0;
278  }
279  }
280  if ($directory_name =~ /^.:$/) {
281  # fix a dos style directory that's just X:, since we don't want the
282  # current directory to be used on that device. that's too random.
283  # instead, we assume they meant the root of the drive.
284  $directory_name = $directory_name . "/";
285  }
286  return $directory_name;
287 }
288 
289 
290 
291 # fixes a PC directory name if it is only a drive letter plus colon.
292 
293 sub patch_name_for_pc {
294  local($name) = @_;
295 #print "name=$name";
296  if (length($name) != 2) { return $name; }
297  local($colon) = substr($name, 1, 1);
298 #print "colon=$colon";
299  # check whether the string needs patching.
300  if ($colon eq ":") {
301  # name is currently in feeble form of "X:"; fix it.
302  $name = $name . '/';
303  }
304 #print "returning=$name";
305  return $name;
306 }
307 
308 
309 
310 # tells whether a filename is important or not. the unimportant category
311 # can usually be safely ignored or deleted.
312 
313 sub important_filename {
314  local($name) = &basename($_[0]);
315 
316  # these are endings that we consider unimportant. where a caret is used
317  # at the front, we will match only the whole string. double slashes are
318  # used before periods to ensure we match a real period character.
319  local(@junk_files) = ("~", "^\\.#.*", "^\\._.*", "\\.aps", "\\.bak",
320  "^binaries", "^bin.ant", "^bin.eclipse",
321  "\\.clw", "^cpdiff_tmp\\.txt", "^\\.ds_store", "^diffs\\.txt",
322  "^diff_tmp\\.txt", "\\.dsp", "\\.dsw", "\\.gid", "gmon\\.out", "\\.isr",
323  "^isconfig\\.ini", "\\.log", "^manifest.txt", "^obj",
324  "\\.obj", "\\.output", "\\.plg", "^RCa.*", "^Release", "\\.res",
325  "\\.sbr", ".*scc", "^Setup\\.dbg", "^Setup\\.inx",
326  "^Setup\\.map", "^Setup\\.obs", "^Selenium_.*Login.html",
327  "\\.stackdump", "^string1033\\.txt", "\\.suo", "\\.swp",
328  "^thumbs.db", "[a-zA-Z0-9]\\.tmp", "^trans\\.tbl", "\\.user", "_version\\.h",
329  "_version\\.rc", "^waste", "\\.ws4", "\\.wsm");
330 
331  foreach $temp (@junk_files) {
332  $temp = $temp . '$';
333  if ($name =~ /${temp}/i) { return 0; }
334  # we hit a match on it being unimportant.
335  }
336 
337  return 1; # anything else is considered important.
338 }
339 
340 
341 
342 sub sanitize_name {
343  return &patch_name_for_pc
344  (&remove_trailing_slashes
345  (&canonicalize(@_)));
346 }
347 
348 
349 
350 sub get_drive_letter {
351  local($path) = @_;
352  if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
353  if (substr($path, 1, 1) eq ":") { return substr($path, 0, 1); }
354  }
355  return "";
356 }
357 
358 
359 
360 sub remove_drive_letter {
361  local($path) = @_;
362  if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
363  if (substr($path, 1, 1) eq ":") { return substr($path, 2); }
364  }
365  return $path;
366 }
367 
368 
369 
370 # these return their argument with the case flipped to lower or upper case.
371 
372 sub lower {
373  local($name) = @_;
374  $name =~ tr/A-Z/a-z/;
375  return $name;
376 }
377 
378 sub upper {
379  local($name) = @_;
380  $name =~ tr/a-z/A-Z/;
381  return $name;
382 }
383 
384 
385 
386 # recursively deletes a directory that is passed as the single parameter.
387 # from http://developer.novell.com/wiki/index.php/Recursive_Directory_Remove
388 sub recursive_delete {
389  my $dir;
390  foreach $dir (@_) {
391  if ( -f "$dir" ) {
392 print "this is not a dir: $dir => should whack it here?";
393 return;
394  }
395 
396  local *DIR;
397  # if we can't open the dir, just skip to the next one.
398  opendir DIR, $dir or next;
399  while ($_ = readdir DIR) {
400  next if /^\.{1,2}$/;
401  my $path = "$dir/$_";
402  unlink $path if -f $path;
403  recursive_delete($path) if -d $path;
404  }
405  closedir DIR;
406  rmdir $dir or print "error - $!";
407  }
408 }
409 
410 
411 
412 # finds any directories under the arguments, which can be a list of directories.
413 sub find_directories {
414  my @dirs_found = ();
415  my $dir;
416  foreach $dir (@_) {
417  local *DIR;
418  # if we can't open the dir, just skip to the next one.
419  opendir DIR, $dir or next;
420  while ($_ = readdir DIR) {
421  # skip if it's current or parent dir.
422  next if /^\.{1,2}$/;
423  my $path = "$dir/$_";
424  # skip if this entry is not itself a directory.
425  next if ! -d $path;
426  push @dirs_found, $path;
427  }
428  closedir DIR;
429  }
430  return @dirs_found;
431 }
432 
433 
434 
435 # given a list of paths, this returns an array of all the filenames found therein.
436 sub find_files {
437  my @files_found = ();
438  my $dir;
439  foreach $dir (@_) {
440  if (-f $dir) {
441  # that's actually just a file, so add it.
442  push @files_found, $dir;
443  next;
444  }
445  local *DIR;
446  # if we can't open the dir, just skip to the next one.
447  opendir DIR, $dir or next;
448  while ($_ = readdir DIR) {
449  # skip if it's current or parent dir.
450  next if /^\.{1,2}$/;
451  my $path = "$dir/$_";
452  # skip if this entry is not a file.
453  next if ! -f $path;
454  push @files_found, $path;
455  }
456  closedir DIR;
457  }
458  return @files_found;
459 }
460 
461 
462 
463 # finds all directories starting at a particular directory and returns them
464 # in an array. does not include the starting directory.
465 sub recursive_find_directories {
466  # first find all the directories within the parameters.
467  my @toplevel = find_directories(@_);
468 
469  my @to_return;
470  push(@to_return, @toplevel);
471 
472  # return the composition of the list we found here plus any directories under those.
473  # we only recurse if there's something to chew on in our directory list.
474  # otherwise, we've hit the bottom of that tree.
475  if (scalar @toplevel > 0) {
476  my @subs_found = recursive_find_directories(@toplevel);
477  push(@to_return, @subs_found);
478  }
479  return @to_return;
480 }
481 
482 
483 
484 1;
485 
#define stat
Definition: Xos2defs.h:41
bool same_file(str file1, str file2)
list glob_list(list original_names)
#hmmm: make this lower-level, a script that is inherited by all perl scripts.
str native_canonicalize(str pathname)
str basename(str pathname)
str dirname(str pathname)
str pc_canonicalize(str pathname)
list split_filename(str pathname)
str canonicalizer(str directory_name, str dirsep)
str canonicalize(str pathname)
str unix_canonicalize(str pathname)
str remove_trailing_slashes(str pathname)
str non_extension(str pathname)
str extension(str pathname)