LLVM OpenMP* Runtime Library
extractExternal.cpp
1 /*
2  * extractExternal.cpp
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stdlib.h>
17 #include <iostream>
18 #include <strstream>
19 #include <fstream>
20 #include <string>
21 #include <set>
22 #include <map>
23 
24 /* Given a set of n object files h ('external' object files) and a set of m
25  object files o ('internal' object files),
26  1. Determines r, the subset of h that o depends on, directly or indirectly
27  2. Removes the files in h - r from the file system
28  3. For each external symbol defined in some file in r, rename it in r U o
29  by prefixing it with "__kmp_external_"
30  Usage:
31  hide.exe <n> <filenames for h> <filenames for o>
32 
33  Thus, the prefixed symbols become hidden in the sense that they now have a special
34  prefix.
35 */
36 
37 using namespace std;
38 
39 void stop(char* errorMsg) {
40  printf("%s\n", errorMsg);
41  exit(1);
42 }
43 
44 // an entry in the symbol table of a .OBJ file
45 class Symbol {
46 public:
47  __int64 name;
48  unsigned value;
49  unsigned short sectionNum, type;
50  char storageClass, nAux;
51 };
52 
53 class _rstream : public istrstream {
54 private:
55  const char *buf;
56 protected:
57  _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
58  ~_rstream() {
59  delete[]buf;
60  }
61 };
62 
63 /* A stream encapuslating the content of a file or the content of a string, overriding the
64  >> operator to read various integer types in binary form, as well as a symbol table
65  entry.
66 */
67 class rstream : public _rstream {
68 private:
69  template<class T>
70  inline rstream& doRead(T &x) {
71  read((char*)&x, sizeof(T));
72  return *this;
73  }
74  static pair<const char*, streamsize> getBuf(const char *fileName) {
75  ifstream raw(fileName,ios::binary | ios::in);
76  if(!raw.is_open())
77  stop("rstream.getBuf: Error opening file");
78  raw.seekg(0,ios::end);
79  streampos fileSize = raw.tellg();
80  if(fileSize < 0)
81  stop("rstream.getBuf: Error reading file");
82  char *buf = new char[fileSize];
83  raw.seekg(0,ios::beg);
84  raw.read(buf, fileSize);
85  return pair<const char*, streamsize>(buf,fileSize);
86  }
87 public:
88  // construct from a string
89  rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
90  /* construct from a file whole content is fully read once to initialize the content of
91  this stream
92  */
93  rstream(const char *fileName):_rstream(getBuf(fileName)){}
94  rstream& operator>>(int &x) {
95  return doRead(x);
96  }
97  rstream& operator>>(unsigned &x) {
98  return doRead(x);
99  }
100  rstream& operator>>(short &x) {
101  return doRead(x);
102  }
103  rstream& operator>>(unsigned short &x) {
104  return doRead(x);
105  }
106  rstream& operator>>(Symbol &e) {
107  read((char*)&e, 18);
108  return *this;
109  }
110 };
111 
112 // string table in a .OBJ file
113 class StringTable {
114 private:
115  map<string, unsigned> directory;
116  size_t length;
117  char *data;
118 
119  // make <directory> from <length> bytes in <data>
120  void makeDirectory(void) {
121  unsigned i = 4;
122  while(i < length) {
123  string s = string(data + i);
124  directory.insert(make_pair(s, i));
125  i += s.size() + 1;
126  }
127  }
128  // initialize <length> and <data> with contents specified by the arguments
129  void init(const char *_data) {
130  unsigned _length = *(unsigned*)_data;
131 
132  if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
133  stop("StringTable.init: Invalid symbol table");
134  if(_data[_length - 1]) {
135  // to prevent runaway strings, make sure the data ends with a zero
136  data = new char[length = _length + 1];
137  data[_length] = 0;
138  } else {
139  data = new char[length = _length];
140  }
141  *(unsigned*)data = length;
142  KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
143  length - sizeof(unsigned));
144  makeDirectory();
145  }
146 public:
147  StringTable(rstream &f) {
148  /* Construct string table by reading from f.
149  */
150  streampos s;
151  unsigned strSize;
152  char *strData;
153 
154  s = f.tellg();
155  f>>strSize;
156  if(strSize < sizeof(unsigned))
157  stop("StringTable: Invalid string table");
158  strData = new char[strSize];
159  *(unsigned*)strData = strSize;
160  // read the raw data into <strData>
161  f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
162  s = f.tellg() - s;
163  if(s < strSize)
164  stop("StringTable: Unexpected EOF");
165  init(strData);
166  delete[]strData;
167  }
168  StringTable(const set<string> &strings) {
169  /* Construct string table from given strings.
170  */
171  char *p;
172  set<string>::const_iterator it;
173  size_t s;
174 
175  // count required size for data
176  for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
177  size_t l = (*it).size();
178 
179  if(l > (unsigned) 0xFFFFFFFF)
180  stop("StringTable: String too long");
181  if(l > 8) {
182  length += l + 1;
183  if(length > (unsigned) 0xFFFFFFFF)
184  stop("StringTable: Symbol table too long");
185  }
186  }
187  data = new char[length];
188  *(unsigned*)data = length;
189  // populate data and directory
190  for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
191  const string &str = *it;
192  size_t l = str.size();
193  if(l > 8) {
194  directory.insert(make_pair(str, p - data));
195  KMP_MEMCPY(p, str.c_str(), l);
196  p[l] = 0;
197  p += l + 1;
198  }
199  }
200  }
201  ~StringTable() {
202  delete[] data;
203  }
204  /* Returns encoding for given string based on this string table.
205  Error if string length is greater than 8 but string is not in
206  the string table--returns 0.
207  */
208  __int64 encode(const string &str) {
209  __int64 r;
210 
211  if(str.size() <= 8) {
212  // encoded directly
213  ((char*)&r)[7] = 0;
214  KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
215  return r;
216  } else {
217  // represented as index into table
218  map<string,unsigned>::const_iterator it = directory.find(str);
219  if(it == directory.end())
220  stop("StringTable::encode: String now found in string table");
221  ((unsigned*)&r)[0] = 0;
222  ((unsigned*)&r)[1] = (*it).second;
223  return r;
224  }
225  }
226  /* Returns string represented by x based on this string table.
227  Error if x references an invalid position in the table--returns
228  the empty string.
229  */
230  string decode(__int64 x) const {
231  if(*(unsigned*)&x == 0) {
232  // represented as index into table
233  unsigned &p = ((unsigned*)&x)[1];
234  if(p >= length)
235  stop("StringTable::decode: Invalid string table lookup");
236  return string(data + p);
237  } else {
238  // encoded directly
239  char *p = (char*)&x;
240  int i;
241 
242  for(i = 0; i < 8 && p[i]; ++i);
243  return string(p, i);
244  }
245  }
246  void write(ostream &os) {
247  os.write(data, length);
248  }
249 };
250 
251 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
252  and writes them to <defined> and <undefined> respectively
253 */
254 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
255  streampos fileSize;
256  size_t strTabStart;
257  unsigned symTabStart, symNEntries;
258  rstream f(fileName);
259 
260  f.seekg(0,ios::end);
261  fileSize = f.tellg();
262 
263  f.seekg(8);
264  f >> symTabStart >> symNEntries;
265  // seek to the string table
266  f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
267  if(f.eof()) {
268  printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
269  fileName, (unsigned long) fileSize, symTabStart, symNEntries);
270  stop("computeExternalSymbols: Unexpected EOF 1");
271  }
272  StringTable stringTable(f); // read the string table
273  if(f.tellg() != fileSize)
274  stop("computeExternalSymbols: Unexpected data after string table");
275 
276  f.clear();
277  f.seekg(symTabStart); // seek to the symbol table
278 
279  defined->clear(); undefined->clear();
280  for(int i = 0; i < symNEntries; ++i) {
281  // process each entry
282  Symbol e;
283 
284  if(f.eof())
285  stop("computeExternalSymbols: Unexpected EOF 2");
286  f>>e;
287  if(f.fail())
288  stop("computeExternalSymbols: File read error");
289  if(e.nAux) { // auxiliary entry: skip
290  f.seekg(e.nAux * 18, ios::cur);
291  i += e.nAux;
292  }
293  // if symbol is extern and defined in the current file, insert it
294  if(e.storageClass == 2)
295  if(e.sectionNum)
296  defined->insert(stringTable.decode(e.name));
297  else
298  undefined->insert(stringTable.decode(e.name));
299  }
300 }
301 
302 /* For each occurrence of an external symbol in the object file named by
303  by <fileName> that is a member of <hide>, renames it by prefixing
304  with "__kmp_external_", writing back the file in-place
305 */
306 void hideSymbols(char *fileName, const set<string> &hide) {
307  static const string prefix("__kmp_external_");
308  set<string> strings; // set of all occurring symbols, appropriately prefixed
309  streampos fileSize;
310  size_t strTabStart;
311  unsigned symTabStart, symNEntries;
312  int i;
313  rstream in(fileName);
314 
315  in.seekg(0,ios::end);
316  fileSize = in.tellg();
317 
318  in.seekg(8);
319  in >> symTabStart >> symNEntries;
320  in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
321  if(in.eof())
322  stop("hideSymbols: Unexpected EOF");
323  StringTable stringTableOld(in); // read original string table
324 
325  if(in.tellg() != fileSize)
326  stop("hideSymbols: Unexpected data after string table");
327 
328  // compute set of occurring strings with prefix added
329  for(i = 0; i < symNEntries; ++i) {
330  Symbol e;
331 
332  in.seekg(symTabStart + i * 18);
333  if(in.eof())
334  stop("hideSymbols: Unexpected EOF");
335  in >> e;
336  if(in.fail())
337  stop("hideSymbols: File read error");
338  if(e.nAux)
339  i += e.nAux;
340  const string &s = stringTableOld.decode(e.name);
341  // if symbol is extern and found in <hide>, prefix and insert into strings,
342  // otherwise, just insert into strings without prefix
343  strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
344  prefix + s : s);
345  }
346 
347  ofstream out(fileName, ios::trunc | ios::out | ios::binary);
348  if(!out.is_open())
349  stop("hideSymbols: Error opening output file");
350 
351  // make new string table from string set
352  StringTable stringTableNew = StringTable(strings);
353 
354  // copy input file to output file up to just before the symbol table
355  in.seekg(0);
356  char *buf = new char[symTabStart];
357  in.read(buf, symTabStart);
358  out.write(buf, symTabStart);
359  delete []buf;
360 
361  // copy input symbol table to output symbol table with name translation
362  for(i = 0; i < symNEntries; ++i) {
363  Symbol e;
364 
365  in.seekg(symTabStart + i*18);
366  if(in.eof())
367  stop("hideSymbols: Unexpected EOF");
368  in >> e;
369  if(in.fail())
370  stop("hideSymbols: File read error");
371  const string &s = stringTableOld.decode(e.name);
372  out.seekp(symTabStart + i*18);
373  e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
374  prefix + s : s);
375  out.write((char*)&e, 18);
376  if(out.fail())
377  stop("hideSymbols: File write error");
378  if(e.nAux) {
379  // copy auxiliary symbol table entries
380  int nAux = e.nAux;
381  for(int j = 1; j <= nAux; ++j) {
382  in >> e;
383  out.seekp(symTabStart + (i + j) * 18);
384  out.write((char*)&e, 18);
385  }
386  i += nAux;
387  }
388  }
389  // output string table
390  stringTableNew.write(out);
391 }
392 
393 // returns true iff <a> and <b> have no common element
394 template <class T>
395 bool isDisjoint(const set<T> &a, const set<T> &b) {
396  set<T>::const_iterator ita, itb;
397 
398  for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
399  const T &ta = *ita, &tb = *itb;
400  if(ta < tb)
401  ++ita;
402  else if (tb < ta)
403  ++itb;
404  else
405  return false;
406  }
407  return true;
408 }
409 
410 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
411  <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
412  files and the rest correspond to the internal object files.
413  postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
414  disjoint. Returns the transitive closure of the set of internal object files, as a set of
415  file indexes, under the 'depends on' relation, minus the set of internal object files.
416 */
417 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
418  set<int> *required = new set<int>;
419  set<int> fresh[2];
420  int i, cur = 0;
421  bool changed;
422 
423  for(i = nTotal - 1; i >= nExternal; --i)
424  fresh[cur].insert(i);
425  do {
426  changed = false;
427  for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
428  set<string> &s = undefined[*it];
429 
430  for(i = 0; i < nExternal; ++i) {
431  if(required->find(i) == required->end()) {
432  if(!isDisjoint(defined[i], s)) {
433  // found a new qualifying element
434  required->insert(i);
435  fresh[1 - cur].insert(i);
436  changed = true;
437  }
438  }
439  }
440  }
441  fresh[cur].clear();
442  cur = 1 - cur;
443  } while(changed);
444  return required;
445 }
446 
447 int main(int argc, char **argv) {
448  int nExternal, nInternal, i;
449  set<string> *defined, *undefined;
450  set<int>::iterator it;
451 
452  if(argc < 3)
453  stop("Please specify a positive integer followed by a list of object filenames");
454  nExternal = atoi(argv[1]);
455  if(nExternal <= 0)
456  stop("Please specify a positive integer followed by a list of object filenames");
457  if(nExternal + 2 > argc)
458  stop("Too few external objects");
459  nInternal = argc - nExternal - 2;
460  defined = new set<string>[argc - 2];
461  undefined = new set<string>[argc - 2];
462 
463  // determine the set of defined and undefined external symbols
464  for(i = 2; i < argc; ++i)
465  computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
466 
467  // determine the set of required external files
468  set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
469  set<string> hide;
470 
471  /* determine the set of symbols to hide--namely defined external symbols of the
472  required external files
473  */
474  for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
475  int idx = *it;
476  set<string>::iterator it2;
477  /* We have to insert one element at a time instead of inserting a range because
478  the insert member function taking a range doesn't exist on Windows* OS, at least
479  at the time of this writing.
480  */
481  for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
482  hide.insert(*it2);
483  }
484 
485  /* process the external files--removing those that are not required and hiding
486  the appropriate symbols in the others
487  */
488  for(i = 0; i < nExternal; ++i)
489  if(requiredExternal->find(i) != requiredExternal->end())
490  hideSymbols(argv[2 + i], hide);
491  else
492  remove(argv[2 + i]);
493  // hide the appropriate symbols in the internal files
494  for(i = nExternal + 2; i < argc; ++i)
495  hideSymbols(argv[i], hide);
496  return 0;
497 }